This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#Use the data sets folder fo setwd
setwd("C:/Users/Arash/Documents/Data sets")


library("plyr")
## Warning: package 'plyr' was built under R version 3.2.5
library("dplyr")
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("sqldf")
## Warning: package 'sqldf' was built under R version 3.2.5
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.2.5
## Loading required package: proto
## Warning: package 'proto' was built under R version 3.2.5
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.2.5
library("nFactors")
## Warning: package 'nFactors' was built under R version 3.2.5
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.2.5
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: psych
## Warning: package 'psych' was built under R version 3.2.5
## Loading required package: boot
## 
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
## 
##     logit
## Loading required package: lattice
## 
## Attaching package: 'lattice'
## The following object is masked from 'package:boot':
## 
##     melanoma
## 
## Attaching package: 'nFactors'
## The following object is masked from 'package:lattice':
## 
##     parallel
library("MASS")
library("psych")
library("ggmap")
## Warning: package 'ggmap' was built under R version 3.2.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.2.5
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library("ggplot2")
library("corrplot")
## Warning: package 'corrplot' was built under R version 3.2.5
library("lubridate")
## Warning: package 'lubridate' was built under R version 3.2.5
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
##     here
## The following object is masked from 'package:base':
## 
##     date
library("reshape")
## Warning: package 'reshape' was built under R version 3.2.5
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
## 
##     stamp
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any
library("sqldf")
library("maps")
## Warning: package 'maps' was built under R version 3.2.5
## 
## Attaching package: 'maps'
## The following object is masked from 'package:plyr':
## 
##     ozone
library("zipcode")
## Warning: package 'zipcode' was built under R version 3.2.5
library("caret")
## Warning: package 'caret' was built under R version 3.2.5
library("rpart")
library("rpart.plot")
## Warning: package 'rpart.plot' was built under R version 3.2.5
library("cwhmisc")
## Warning: package 'cwhmisc' was built under R version 3.2.5
## Loading required package: grid
## 
## Attaching package: 'cwhmisc'
## The following object is masked from 'package:ggplot2':
## 
##     %+%
## The following object is masked from 'package:psych':
## 
##     %+%
library("rattle")
## Warning: package 'rattle' was built under R version 3.2.5
## Rattle: A free graphical interface for data mining with R.
## Version 4.1.0 Copyright (c) 2006-2015 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library("e1071")
## Warning: package 'e1071' was built under R version 3.2.5
library("broom")
library("randomForest")
## Warning: package 'randomForest' was built under R version 3.2.5
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:psych':
## 
##     outlier
## The following object is masked from 'package:dplyr':
## 
##     combine
library("nnet")
library("xtable")
## Warning: package 'xtable' was built under R version 3.2.5
library("visreg")
## Warning: package 'visreg' was built under R version 3.2.5
################# DATA PREPARAATION  ##########################


#combining and exploring data for group project
#exploration of data for group project
setwd("C:/Users/Arash/Documents/Data sets")
crime <- read.csv("Crime_2014.csv")
facilities <- read.csv("Facilities_by_Zipcode.csv")
home_sales <- read.csv("MC_Home_Sales_by_Zip_Code_2014.csv")
dropout <- read.csv("MCPS_Dropout_Attendance_by_Zipcode.csv")
irs <- read.csv("MC_IRS.csv")
Most_Data <- read.csv("Most_Data.csv")
data <- read.csv("Long_and_Foster_Columns_Removed.csv")

##### NOTE: Tthe followin part is the very first codes generated to clean and scrutizing data. it can be skipped######

#Make a table of number of incidents by zipcode using dplyr library
#it is sorting (TRUE) by most crime to least crime.

zip_code_tbl <- tbl_df(crime)
Incidents_by_zipcode <-zip_code_tbl %>% group_by(Zip.Code) %>% tally(sort = TRUE)
write.csv(Incidents_by_zipcode, "Crime_by_Zipcode_2014.csv")

#Rename the columns so they can be combined with Most_Data.csv
Incidents_by_zipcode$Zip <- Incidents_by_zipcode$Zip.Code
Incidents_by_zipcode$Number_of_Crimes_2014 <- Incidents_by_zipcode$n

#take out the original names of the columns and keep only the new ones
keeps <- c("Zip", "Number_of_Crimes_2014")
Incidents_by_zipcode <-Incidents_by_zipcode[keeps]

#counts the number of specific zipcodes, for example 20852
#length(which(crime$Zip.Code == 20852))

#same thing with the public facilities dataset
#facilities_tbl <- tbl_df(facilities)
#facilities_by_zipcode <-facilities_tbl %>% group_by(Zip) %>% tally(sort = TRUE)
#write.csv(facilities_by_zipcode, "Facilities_by_Zipcode.csv")


#try to combine facilities and crime
crime$Zip<-crime$Zip.Code

data_1 <- merge(crime, facilities, by = "Zip", all.x = TRUE, all.y = TRUE)

#add dropout
data_2 <- merge(data_1, dropout, by = "Zip", all.x = TRUE, all.y =TRUE)

#add irs
data_3 <- merge(data_2, irs, by = "Zip", all.x = TRUE, all.y = TRUE)


#combining the crime incidents 2014 with the other Most_Data_file
Most_Data_2014 <- merge(Incidents_by_zipcode, Most_Data, by = "Zip", all.x = TRUE, all.y = TRUE)
#write the file to csv
#write.csv(Most_Data_2014, "Most_Data_2014.csv")

###########

#cleaning Long_and_Foster housing sales dataset
#original file name Group_Project_L_F_Housing_Cleaning
#Natasha
data_1 <- read.csv("Long_and_Foster.csv")
Most_Data <- read.csv("Most_Data_2014.csv")

str(data_1)
## 'data.frame':    10894 obs. of  31 variables:
##  $ ML.                   : Factor w/ 9731 levels "MC7402458","MC7720901",..: 4168 2917 5 1368 3826 38 2427 3841 12 21 ...
##  $ City                  : Factor w/ 38 levels "ADELPHI","ASHTON",..: 10 4 31 4 4 4 4 10 31 10 ...
##  $ State                 : Factor w/ 1 level "MD": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Zip.4                 : int  4458 3026 NA 3065 4581 2258 3065 6660 1956 4208 ...
##  $ Zip.Code              : int  20815 20817 20854 20817 20817 20816 20817 20815 20854 20815 ...
##  $ List.Price            : num  8750000 7500000 4995000 4495000 4495000 ...
##  $ Original.List.Price   : num  8750000 5995000 5995000 4795000 4495000 ...
##  $ Close.Price           : num  8650000 7350000 4400000 4200000 4350000 4100000 4000000 3900000 3300000 3310000 ...
##  $ Advertised.Subdivision: Factor w/ 1594 levels "0","10101 GROSVENOR PARK COD",..: 1018 107 180 107 1088 514 107 708 1088 214 ...
##  $ Legal.Subdivision     : Factor w/ 1318 levels "","10101 GROSVENOR PARK COD",..: 855 92 154 92 912 441 92 606 912 194 ...
##  $ Status                : Factor w/ 1 level "SOLD": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Close.Date            : Factor w/ 312 levels "1/1/2014","1/10/2014",..: 155 205 150 205 218 118 213 211 172 168 ...
##  $ DOMM                  : int  0 19 524 106 10 243 26 31 442 294 ...
##  $ DOMP                  : int  0 343 524 106 10 243 26 31 442 294 ...
##  $ Baths.All             : int  4 9 13 11 10 8 7 8 10 8 ...
##  $ Baths.Half            : int  0 2 3 1 3 1 1 2 2 2 ...
##  $ Baths.Full            : int  4 7 10 10 7 7 6 6 8 6 ...
##  $ Bedrooms              : int  4 6 9 8 5 5 5 6 5 6 ...
##  $ Condo.Coop.Fee        : num  5010 NA NA NA NA NA NA NA NA NA ...
##  $ Cooling               : Factor w/ 290 levels "Air Purification System, Ceiling Fan(s), Central Air Conditioning, ENERGY STAR Cooling System, Heat Pump(s), Programmable Therm"| __truncated__,..: 218 68 199 281 199 199 138 138 199 199 ...
##  $ Dining.Kitchen        : Factor w/ 2339 levels "2nd Kitchen",..: 2181 328 11 190 190 1293 190 1947 190 190 ...
##  $ Farm                  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Fireplaces            : int  1 5 5 5 6 4 4 5 3 3 ...
##  $ Heating               : Factor w/ 399 levels "90% Forced Air",..: 170 110 110 251 110 241 251 170 251 49 ...
##  $ HOA                   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ HOA.Fee               : num  NA NA NA NA NA NA NA NA 747 NA ...
##  $ Lot.Sqft              : int  NA 124058 90169 42974 87120 35658 30755 18481 217800 11583 ...
##  $ Total.Square.Footage  : int  3400 0 18500 12000 11783 5116 8800 0 8500 0 ...
##  $ Townhouse.Type        : Factor w/ 9 levels "","Detached",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Type                  : Factor w/ 16 levels "Attach/Row Hse",..: 7 3 3 3 3 3 3 3 3 3 ...
##  $ Parking               : Factor w/ 1149 levels "Additional Storage Area, Gen Comm Elem, Unassigned",..: 575 819 575 575 488 399 769 355 575 874 ...
data_1 <- data_1[!is.na(data_1$Zip.Code),]
#this dataset is clean, there were no na fields in zipcode


glimpse(data_1)
## Observations: 10,894
## Variables: 31
## $ ML.                    <fctr> MC8320294, MC8291029, MC7919334, MC824...
## $ City                   <fctr> CHEVY CHASE, BETHESDA, POTOMAC, BETHES...
## $ State                  <fctr> MD, MD, MD, MD, MD, MD, MD, MD, MD, MD...
## $ Zip.4                  <int> 4458, 3026, NA, 3065, 4581, 2258, 3065,...
## $ Zip.Code               <int> 20815, 20817, 20854, 20817, 20817, 2081...
## $ List.Price             <dbl> 8750000, 7500000, 4995000, 4495000, 449...
## $ Original.List.Price    <dbl> 8750000, 5995000, 5995000, 4795000, 449...
## $ Close.Price            <dbl> 8650000, 7350000, 4400000, 4200000, 435...
## $ Advertised.Subdivision <fctr> PARC SOMERSET CODM, BRADLEY HILLS GROV...
## $ Legal.Subdivision      <fctr> PARC SOMERSET CODM, BRADLEY HILLS GROV...
## $ Status                 <fctr> SOLD, SOLD, SOLD, SOLD, SOLD, SOLD, SO...
## $ Close.Date             <fctr> 4/15/2014, 6/12/2014, 4/1/2014, 6/12/2...
## $ DOMM                   <int> 0, 19, 524, 106, 10, 243, 26, 31, 442, ...
## $ DOMP                   <int> 0, 343, 524, 106, 10, 243, 26, 31, 442,...
## $ Baths.All              <int> 4, 9, 13, 11, 10, 8, 7, 8, 10, 8, 9, 6,...
## $ Baths.Half             <int> 0, 2, 3, 1, 3, 1, 1, 2, 2, 2, 2, 2, 0, ...
## $ Baths.Full             <int> 4, 7, 10, 10, 7, 7, 6, 6, 8, 6, 7, 4, 7...
## $ Bedrooms               <int> 4, 6, 9, 8, 5, 5, 5, 6, 5, 6, 6, 6, 4, ...
## $ Condo.Coop.Fee         <dbl> 5010, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ Cooling                <fctr> Heat Pump(s), Ceiling Fan(s), Central ...
## $ Dining.Kitchen         <fctr> Sep Dining Rm, Breakfast Room, Gourmet...
## $ Farm                   <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALS...
## $ Fireplaces             <int> 1, 5, 5, 5, 6, 4, 4, 5, 3, 3, 6, 5, 4, ...
## $ Heating                <fctr> Forced Air, Central, Forced Air, Zoned...
## $ HOA                    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALS...
## $ HOA.Fee                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 747.00,...
## $ Lot.Sqft               <int> NA, 124058, 90169, 42974, 87120, 35658,...
## $ Total.Square.Footage   <int> 3400, 0, 18500, 12000, 11783, 5116, 880...
## $ Townhouse.Type         <fctr> , , , , , , , , , , , , , , , , , , , ...
## $ Type                   <fctr> Hi-Rise 9+ Floors, Detached, Detached,...
## $ Parking                <fctr> Garage, Garage, Paved Driveway, Garage...
#this will list all of the levels in a column

df1 = data_1
factor1 <- sqldf("select distinct Type as 'Type' from df1")
## Loading required package: tcltk
## Warning: Quoted identifiers should have class SQL, use DBI::SQL() if the
## caller performs the quoting.
factor1 
##                   Type
## 1    Hi-Rise 9+ Floors
## 2             Detached
## 3            Townhouse
## 4     House of Worship
## 5       Attach/Row Hse
## 6           Patio Home
## 7        Semi-Detached
## 8    Garden 1-4 Floors
## 9  Mid-Rise 5-8 Floors
## 10              Duplex
## 11               Other
## 12   Dwelling w/Rental
## 13        Back-to-Back
## 14        Multi-Family
## 15           Penthouse
## 16                Quad
#lists the number of times each level appears in a column (in this case Type is the column)
set.seed(1)
data_1 %>% 
  group_by(Type) %>%
  summarise(no_rows = length(Type))
## # A tibble: 16 × 2
##                   Type no_rows
##                 <fctr>   <int>
## 1       Attach/Row Hse     160
## 2         Back-to-Back      29
## 3             Detached    6301
## 4               Duplex      18
## 5    Dwelling w/Rental       2
## 6    Garden 1-4 Floors     963
## 7    Hi-Rise 9+ Floors     694
## 8     House of Worship       2
## 9  Mid-Rise 5-8 Floors     114
## 10        Multi-Family       9
## 11               Other      23
## 12          Patio Home      49
## 13           Penthouse       2
## 14                Quad       2
## 15       Semi-Detached      29
## 16           Townhouse    2497
#same thing to find how many sales in each zipcode in 2014
set.seed(1)
number_of_sales_by_zip<- data_1 %>% 
  group_by(Zip.Code) %>%
  summarise(no_rows = length(Zip.Code))

#Rename the columns so they can be combined with Most_Data.csv
number_of_sales_by_zip$Zip <- number_of_sales_by_zip$Zip.Code
number_of_sales_by_zip$Number_of_Sales_2014 <- number_of_sales_by_zip$no_rows

#take out the original names of the columns and keep only the new ones
keeps <- c("Zip", "Number_of_Sales_2014")
Sales_by_zipcode <-number_of_sales_by_zip[keeps]

#combining the number of sales 2014 with the other Most_Data_file
Most_Data_2014 <- merge(number_of_sales_by_zip, Most_Data, by = "Zip", all.x = TRUE, all.y = TRUE)
row.has.na <- apply(Most_Data_2014, 1, function(x){any(is.na(x))})






# aggregating the different crime rates by state
crime<-read.csv("crime-original.csv")
b=matrix(0,55, 89-23+2)
for  (i in 24:90)
  b[,i-22]  <- (aggregate(crime[,i]~Zip.Code,data = crime,FUN = "sum"))$`crime[, i]`
# 68 crime types for 55 states




#some modification on Crime file in excel( the new crime file is (crime-final))
crime_f<-read.csv("crime-final.csv")
#some modification on most_data_2014 in excel( the new file is most_data_2014)
#some modification on Crime file in excel( the new crime file is (crime-final))
crime_f<-read.csv("crime-final.csv")
#most_data
merge1<-read.csv("merge.csv")
housing<-data
colnames(facilities)[1]="Zip.Code"
data_1 <- merge(crime_f, facilities, by = "Zip.Code", all.x = TRUE, all.y = TRUE)
data_2 <- merge(data_1,merge1, by = "Zip.Code", all.x = TRUE, all.y = TRUE)
data_3 <- merge(data_2,housing, by = "Zip.Code", all.x = TRUE, all.y = TRUE)
data_4<-data_3[-c(1,2100,8099),]



#Normalizing crimes
a=data_4$IRS_Estimated_Population_2014
data_4[,2:24]<-apply(data_4[,2:24],2,FUN = function(x) x/a)

#The final data to work on
main=data_4



#removing NAs, Classify based on prices
nn= is.na(main$community_facilities_count)
main=main[!nn,]
main$price_dif= log(main$Close.Price) -log(main$Original.List.Price)
main$class=cut(main$Close.Price,c(0,300000,650000,8650000),c(1:3))
main$Date.Quarter=cut(main$Date.Quarter,c(0,1,2,3,4),c(1:4))
#nn= is.na(main$Lot.Sqft)
#main=main[!nn,]




############################ Crime Analysis (by Nibret)###########################################

summary(crime_f)
##     Zip.Code     ROB.FIREARM...STREET AGG.ASSLT.FIREARM.CITIZEN
##  Min.   :20812   Min.   : 0.00        Min.   : 0.00            
##  1st Qu.:20842   1st Qu.: 0.00        1st Qu.: 0.50            
##  Median :20866   Median : 7.00        Median : 4.00            
##  Mean   :20865   Mean   :15.02        Mean   : 9.14            
##  3rd Qu.:20891   3rd Qu.:22.50        3rd Qu.:11.50            
##  Max.   :20912   Max.   :82.00        Max.   :38.00            
##  BURG.FORCE.RES.NIGHT LARCENY.PICK.POCKET AUTO.THEFT...PASSENGER.VEHICLE
##  Min.   :  0.00       Min.   :   0.0      Min.   : 0.00                 
##  1st Qu.: 12.00       1st Qu.:  32.0      1st Qu.: 2.00                 
##  Median : 46.00       Median : 175.0      Median : 7.00                 
##  Mean   : 55.91       Mean   : 290.6      Mean   :18.88                 
##  3rd Qu.: 95.50       3rd Qu.: 482.5      3rd Qu.:35.50                 
##  Max.   :222.00       Max.   :1132.0      Max.   :79.00                 
##  ASSAULT...BATTERY...CITIZEN VANDALISM.MOTOR.VEHICLE
##  Min.   :  0.00              Min.   :  0.0          
##  1st Qu.:  7.50              1st Qu.: 10.0          
##  Median : 39.00              Median : 49.0          
##  Mean   : 64.63              Mean   : 65.3          
##  3rd Qu.: 95.00              3rd Qu.:117.0          
##  Max.   :222.00              Max.   :219.0          
##  WEAPON.POSSESSION.HANDGUN SEX.OFFENSE...SEX..ASSAULT      drug       
##  Min.   : 0.000            Min.   : 0.000             Min.   :  0.00  
##  1st Qu.: 0.000            1st Qu.: 0.000             1st Qu.:  6.50  
##  Median : 5.000            Median : 3.000             Median : 52.00  
##  Mean   : 7.791            Mean   : 4.791             Mean   : 96.91  
##  3rd Qu.:11.500            3rd Qu.: 8.500             3rd Qu.:152.00  
##  Max.   :33.000            Max.   :21.000             Max.   :396.00  
##  FAMILY.OFFENSE...ABUSE.CHILD JUVENILE.RUNAWAY
##  Min.   : 0.000               Min.   : 0.0    
##  1st Qu.: 1.000               1st Qu.: 0.0    
##  Median : 5.000               Median : 5.0    
##  Mean   : 6.233               Mean   :12.3    
##  3rd Qu.: 8.500               3rd Qu.:19.5    
##  Max.   :24.000               Max.   :56.0    
##  LIQUOR...UNLAWFUL.POSS.UNDER.21 DISORDERLY.CONDUCT
##  Min.   :  0.00                  Min.   :  0.00    
##  1st Qu.:  1.00                  1st Qu.:  0.00    
##  Median :  8.00                  Median :  6.00    
##  Mean   : 26.12                  Mean   : 22.26    
##  3rd Qu.: 26.50                  3rd Qu.: 22.50    
##  Max.   :207.00                  Max.   :189.00    
##  SUICIDE...POISON.OVERDOSE LITTERING.TRASH.DUMPING  TRESPASSING    
##  Min.   : 0.000            Min.   : 0.000          Min.   :  0.00  
##  1st Qu.: 1.000            1st Qu.: 0.000          1st Qu.:  0.00  
##  Median : 6.000            Median : 0.000          Median :  2.00  
##  Mean   : 6.674            Mean   : 1.116          Mean   : 10.79  
##  3rd Qu.:10.500            3rd Qu.: 1.000          3rd Qu.: 14.00  
##  Max.   :27.000            Max.   :13.000          Max.   :129.00  
##  HARASSMENT.STALKING DRIVING.UNDER.THE.INFLUENCE   FIRE.OTHER   
##  Min.   : 0.000      Min.   :  0.00              Min.   : 0.00  
##  1st Qu.: 0.000      1st Qu.:  5.00              1st Qu.: 1.00  
##  Median : 2.000      Median : 36.00              Median : 7.00  
##  Mean   : 2.465      Mean   : 74.91              Mean   :11.12  
##  3rd Qu.: 4.000      3rd Qu.:151.00              3rd Qu.:16.50  
##  Max.   :10.000      Max.   :246.00              Max.   :43.00  
##  POL.INFORMATION  LOST.PROPERTY    RECOVERED.PROPERTY.MONT..CO.
##  Min.   :  0.00   Min.   :  0.00   Min.   : 0.00               
##  1st Qu.: 10.50   1st Qu.:  3.50   1st Qu.: 1.00               
##  Median : 44.00   Median : 17.00   Median : 7.00               
##  Mean   : 60.09   Mean   : 36.56   Mean   :13.28               
##  3rd Qu.: 85.50   3rd Qu.: 57.50   3rd Qu.:21.50               
##  Max.   :262.00   Max.   :208.00   Max.   :91.00
##Focusing on Mean

crime.mean <- lapply(crime_f, mean, na.rm = TRUE)

print(crime.mean) 
## $Zip.Code
## [1] 20864.53
## 
## $ROB.FIREARM...STREET
## [1] 15.02326
## 
## $AGG.ASSLT.FIREARM.CITIZEN
## [1] 9.139535
## 
## $BURG.FORCE.RES.NIGHT
## [1] 55.90698
## 
## $LARCENY.PICK.POCKET
## [1] 290.6279
## 
## $AUTO.THEFT...PASSENGER.VEHICLE
## [1] 18.88372
## 
## $ASSAULT...BATTERY...CITIZEN
## [1] 64.62791
## 
## $VANDALISM.MOTOR.VEHICLE
## [1] 65.30233
## 
## $WEAPON.POSSESSION.HANDGUN
## [1] 7.790698
## 
## $SEX.OFFENSE...SEX..ASSAULT
## [1] 4.790698
## 
## $drug
## [1] 96.90698
## 
## $FAMILY.OFFENSE...ABUSE.CHILD
## [1] 6.232558
## 
## $JUVENILE.RUNAWAY
## [1] 12.30233
## 
## $LIQUOR...UNLAWFUL.POSS.UNDER.21
## [1] 26.11628
## 
## $DISORDERLY.CONDUCT
## [1] 22.25581
## 
## $SUICIDE...POISON.OVERDOSE
## [1] 6.674419
## 
## $LITTERING.TRASH.DUMPING
## [1] 1.116279
## 
## $TRESPASSING
## [1] 10.7907
## 
## $HARASSMENT.STALKING
## [1] 2.465116
## 
## $DRIVING.UNDER.THE.INFLUENCE
## [1] 74.90698
## 
## $FIRE.OTHER
## [1] 11.11628
## 
## $POL.INFORMATION
## [1] 60.09302
## 
## $LOST.PROPERTY
## [1] 36.55814
## 
## $RECOVERED.PROPERTY.MONT..CO.
## [1] 13.27907
##Crime Correlation

crime_corelation <- cor(crime_f)


corrplot(crime_corelation, method="circle")

##Crime by Month

data = read.csv("date-crime.csv")
df <- data.frame(data)
dat.m <- melt(df,id.vars = "Month")

ggplot(dat.m, aes(x = Month, y = value, fill=variable)) +  
  geom_bar(stat='identity') + guides(fill=FALSE) + scale_x_discrete(breaks = 1:12, labels=c("Jan","foo","bar","baz","phi","fum", "Jul", "Aug", "Sept", "Oct", "Nov", "Dec"))

##########################Maps for crime and housing prices( By Ashley)##############################
data(zipcode)
sale_count <- main
sale_count$Zip.Code<- clean.zipcodes(sale_count$Zip.Code)
#combine current dataset with zipcode dataset
sale_count<- merge(sale_count, zipcode, by.x='Zip.Code', by.y='zip')
#group housing sale count by zip code
density<- ddply(sale_count, .(Zip.Code), "nrow")
names(density)[2] <- "count"
#combine current dataset with 'count' field
Sale <- merge(sale_count, density)
#remove duplicates (only show unique zip codes)
Sale<-Sale[!duplicated(Sale$Zip.Code),]

#map of housing sales in Montgomery County
moco <- get_map("montgomery county")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=montgomery+county&zoom=10&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=montgomery%20county&sensor=false
moco_map <- ggmap(moco)
moco_housing <- moco_map + stat_density2d(aes(x = longitude, y = latitude, fill = ..level..,
                                              alpha = ..level..),
                                          bins = 4, data = Sale,
                                          geom = "polygon") + xlim(-77.5,-76.8) + ylim(38.9,39.3) + labs(title="House Sales By Zip Code", x="Longitude", y="Latitude")
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
moco_housing
## Warning: Removed 1 rows containing missing values (geom_rect).

#CRIME COUNTS (using just the crime dataset, since each row is a crime incident)

crime_count <- read.csv("crime_2014.csv", header=TRUE, sep=",")
crime_count$Zip.Code<- clean.zipcodes(crime_count$Zip.Code)
#combine current dataset with zipcode dataset
crime_count<- merge(crime_count, zipcode, by.x='Zip.Code', by.y='zip')
#group crime count by zip code
density<- ddply(crime_count, .(Zip.Code), "nrow")
names(density)[2] <- "count"
#combine current dataset with 'count' field
Crime <- merge(crime_count, density)
#remove duplicates (only show unique zip codes)
Crime<-Crime[!duplicated(Crime$Zip.Code),]

#map of crime counts in Montgomery County
moco_crime <- moco_map + stat_density2d(aes(x = longitude, y = latitude, fill = ..level..,
                                            alpha = ..level..),
                                        bins = 4, data = Crime,
                                        geom = "polygon") + xlim(-77.5,-76.8) + ylim(38.9,39.3) + labs(title="Crime Counts By Zip Code", x="Longitude", y="Latitude")
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Scale for 'y' is already present. Adding another scale for 'y', which
## will replace the existing scale.
moco_crime
## Warning: Removed 2 rows containing non-finite values (stat_density2d).
## Warning: Removed 1 rows containing missing values (geom_rect).

#MEDIAN HOUSING SALES
dataset <- main
main$Zip.Code<- clean.zipcodes(main$Zip.Code)
#combine current dataset with zipcode dataset
median_sale<- merge(main, zipcode, by.x='Zip.Code', by.y='zip')
#group median price by zip code
density<- ddply(median_sale, .(Zip.Code), "nrow")
names(density)[2] <- "count"
#combine current dataset with 'count' field
Median <- merge(median_sale, density)
#remove duplicates (only show unique zip codes)
Median<-Median[!duplicated(Median$Zip.Code),]

#map of median price in Montgomery County
moco_median <- moco_map + stat_bin2d(
  aes(x = longitude, y = latitude, colour = Median_Sales,
      fill = Median_Sales),
  size = 0.25, bins = 20, alpha = 0.5,
  data = Median) + labs(title="Median Housing Prices By Zip Code", x="Longitude", y="Latitude")
moco_median

################ CLASSIFICATION MODELS and two additiona regression models(By Arash)#######################

#training and testing data
index=nrow(main)
index2=sample(index, round(index/5))
train=main[-index2,]
test=main[index2,]





#SVM

base1 <- sum(test$class == 2) / nrow(test)
results <- data.frame(model=c("MFC"), score=c(base1))

#performane function
performance1 = function (M,df,name){
  
  pr=predict(M,test)
  ac=confusionMatrix(pr,test$class)$overal[1]
  df <- rbind(df, data.frame(model=c(name), score=ac) )
  return(df)
}

#+ number of bedrooms

train1=train[,c(45,54)]
M1= svm(class~.,data = train1)


#+number of bathrooms
train1=train[,c(45,44,54)]
M2= svm(class~.,data = train1)


#+type of house
train1=train[,c(45,44,50,54)]
M3= svm(class~.,data = train1)


#+garage

train1=train[,c(45,44,50,52,54)]
M4= svm(class~.,data = train1)

#+total crime
train1=train[,c(27,45,44,50,52,54)]
M5= svm(class~.,data = train1)


#+facilies
train1=train[,c(25,27,45,44,50,52,54)]
M6= svm(class~.,data = train1)


#+crimes in details
train1=train[,c(2:24,25,27,45,44,50,52,54)]
M7= svm(class~.,data = train1)



results<-performance1(M1,results,"+number of bedrooms")
results<-performance1(M2,results,"+number of bathrooms")
results<-performance1(M3,results,"+type of house")
results<-performance1(M4,results,"+garage")
results<-performance1(M5,results,"+total crime")
results<-performance1(M6,results,"+facilies")
results<-performance1(M7,results,"+crime in details")


#The result of feature engineering for model accuracy

results
##                          model     score
## 1                          MFC 0.4979253
## Accuracy   +number of bedrooms 0.6274781
## Accuracy1 +number of bathrooms 0.6597510
## Accuracy2       +type of house 0.6860304
## Accuracy3              +garage 0.7123098
## Accuracy4         +total crime 0.7183034
## Accuracy5            +facilies 0.7349009
## Accuracy6    +crime in details 0.8363301
# AS we can see the detailed crime give an additional 10 % accuracy


#Error Analysis: contingency table
test$pr=predict(M7,test)
confusionMatrix(test$pr,test$class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 424  81   0
##          2  83 928 120
##          3   0  71 462
## 
## Overall Statistics
##                                           
##                Accuracy : 0.8363          
##                  95% CI : (0.8201, 0.8517)
##     No Information Rate : 0.4979          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.736           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8363   0.8593   0.7938
## Specificity            0.9513   0.8136   0.9553
## Pos Pred Value         0.8396   0.8205   0.8668
## Neg Pred Value         0.9501   0.8536   0.9267
## Prevalence             0.2337   0.4979   0.2683
## Detection Rate         0.1955   0.4278   0.2130
## Detection Prevalence   0.2328   0.5214   0.2457
## Balanced Accuracy      0.8938   0.8364   0.8745
#Decision tree

#using the final model obtained by feature engineering by a decision model and gain the accuracy
train1=train[,c(2:24,25,27,45,44,50,52,54)]
M8= rpart(class~.,data = train1,method = "class",parms=list(split="information"),
          control=rpart.control(usesurrogate=0,  maxsurrogate=0))
fancyRpartPlot(M8)

test$pr=predict(M8,test,type = "class")
c(Accuracy=mean(test$pr==test$class))
##  Accuracy 
## 0.8003688
#Error Analysis: contingency table
confusionMatrix(test$pr,test$class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 405  97   0
##          2 102 894 145
##          3   0  89 437
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8004         
##                  95% CI : (0.7829, 0.817)
##     No Information Rate : 0.4979         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.6774         
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.7988   0.8278   0.7509
## Specificity            0.9416   0.7732   0.9439
## Pos Pred Value         0.8068   0.7835   0.8308
## Neg Pred Value         0.9388   0.8191   0.9117
## Prevalence             0.2337   0.4979   0.2683
## Detection Rate         0.1867   0.4122   0.2015
## Detection Prevalence   0.2314   0.5260   0.2425
## Balanced Accuracy      0.8702   0.8005   0.8474
#Random forest
#Using Random forrest as an improvised version of Decision Trees
M9 <- randomForest(class ~ . , data = train1)
test$pr=predict(M9,test,type = "class")
c(Accuracy=mean(test$pr==test$class))
##  Accuracy 
## 0.8497003
confusionMatrix(test$pr,test$class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 435  71   0
##          2  71 938 112
##          3   1  71 470
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8497         
##                  95% CI : (0.834, 0.8645)
##     No Information Rate : 0.4979         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.758          
##  Mcnemar's Test P-Value : 0.01705        
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8580   0.8685   0.8076
## Specificity            0.9573   0.8320   0.9546
## Pos Pred Value         0.8597   0.8368   0.8672
## Neg Pred Value         0.9567   0.8645   0.9312
## Prevalence             0.2337   0.4979   0.2683
## Detection Rate         0.2006   0.4325   0.2167
## Detection Prevalence   0.2333   0.5168   0.2499
## Balanced Accuracy      0.9076   0.8502   0.8811
#Multinomial logisitc regression



train1=train[,c(2:24,25,27,45,44,50,52,54)]
test1=test[,c(2:24,25,27,45,44,50,52,54)]
M10=multinom(class~.,data=train1)
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7123.458885
## iter  20 value 5901.535115
## iter  30 value 5190.619980
## iter  40 value 4733.458334
## iter  50 value 4468.519069
## iter  60 value 3897.529378
## iter  70 value 3834.239842
## iter  80 value 3805.719500
## iter  90 value 3758.113407
## iter 100 value 3715.500857
## final  value 3715.500857 
## stopped after 100 iterations
summary(M10)
## Call:
## multinom(formula = class ~ ., data = train1)
## 
## Coefficients:
##   (Intercept) ROB.FIREARM...STREET AGG.ASSLT.FIREARM.CITIZEN
## 2   -7.135703           -213.75731                 -99.34253
## 3  -16.302249             17.51108                -154.08095
##   BURG.FORCE.RES.NIGHT LARCENY.PICK.POCKET AUTO.THEFT...PASSENGER.VEHICLE
## 2            -144.7899            212.2837                      231.20507
## 3             159.0209            479.3184                      -98.78393
##   ASSAULT...BATTERY...CITIZEN VANDALISM.MOTOR.VEHICLE
## 2                    -187.477               -270.1950
## 3                   -1353.464                205.1457
##   WEAPON.POSSESSION.HANDGUN SEX.OFFENSE...SEX..ASSAULT       drug
## 2                  26.01334                   168.1376  -573.8341
## 3                -278.20594                  -212.9966 -1386.4735
##   FAMILY.OFFENSE...ABUSE.CHILD JUVENILE.RUNAWAY
## 2                     2.423389         126.1159
## 3                  -246.872710        -648.3282
##   LIQUOR...UNLAWFUL.POSS.UNDER.21 DISORDERLY.CONDUCT
## 2                        107.6619          -431.0789
## 3                       -465.9298           293.9566
##   SUICIDE...POISON.OVERDOSE LITTERING.TRASH.DUMPING TRESPASSING
## 2                 -69.27394                12.49951    87.00731
## 3                  60.80521               -30.81818   -28.40027
##   HARASSMENT.STALKING DRIVING.UNDER.THE.INFLUENCE FIRE.OTHER
## 2           -29.86632                    125.0875 -14.404768
## 3            99.37772                   -311.4888   7.409725
##   POL.INFORMATION LOST.PROPERTY RECOVERED.PROPERTY.MONT..CO.
## 2        376.7549      80.24806                    248.39233
## 3        651.7004     425.63089                     75.55131
##   community_facilities_count Number_of_Crimes_2014  Bedrooms Baths.All
## 2               -0.015218174          0.0002911417 0.5840812  1.435588
## 3                0.005355814          0.0011295990 0.8755583  2.574569
##   Type.yBack-to-Back Type.yDetached Type.yDuplex Type.yDwelling w/Rental
## 2          -29.44206       3.866120     -0.16771                19.13971
## 3          -10.58724       5.548716    -16.26789               -13.35261
##   Type.yGarden 1-4 Floors Type.yHi-Rise 9+ Floors Type.yHouse of Worship
## 2               0.3754771              -0.3850387              23.167452
## 3              -0.2276385              -0.3488163              -4.445758
##   Type.yMid-Rise 5-8 Floors Type.yMulti-Family Type.yOther
## 2                 1.3832287         -1.5808008   0.8849762
## 3                 0.4042231         -0.0937052   0.5258798
##   Type.yPatio Home Type.yPenthouse Type.yQuad Type.ySemi-Detached
## 2        0.7332469      35.3289469 -15.812324           0.4324783
## 3        2.3358770      -0.8937585  -1.038738           1.2834926
##   Type.yTownhouse Has.GarageTRUE
## 2       0.3647558       1.984681
## 3       0.1220644       3.459350
## 
## Std. Errors:
##   (Intercept) ROB.FIREARM...STREET AGG.ASSLT.FIREARM.CITIZEN
## 2  0.03711233         1.713773e-05              1.442724e-05
## 3  0.01027058         8.740511e-06              6.355311e-06
##   BURG.FORCE.RES.NIGHT LARCENY.PICK.POCKET AUTO.THEFT...PASSENGER.VEHICLE
## 2         1.056690e-04        0.0008601332                   2.100343e-05
## 3         3.762445e-05        0.0003568732                   1.149117e-05
##   ASSAULT...BATTERY...CITIZEN VANDALISM.MOTOR.VEHICLE
## 2                4.130097e-05            9.567034e-05
## 3                1.220721e-05            3.952020e-05
##   WEAPON.POSSESSION.HANDGUN SEX.OFFENSE...SEX..ASSAULT         drug
## 2              8.192552e-06               2.753739e-06 6.989767e-05
## 3              2.906017e-06               1.453405e-06 2.964995e-05
##   FAMILY.OFFENSE...ABUSE.CHILD JUVENILE.RUNAWAY
## 2                 6.407365e-06     5.554892e-06
## 3                 3.224107e-06     1.217695e-06
##   LIQUOR...UNLAWFUL.POSS.UNDER.21 DISORDERLY.CONDUCT
## 2                    4.095796e-05       4.834967e-05
## 3                    1.803187e-05       2.709065e-05
##   SUICIDE...POISON.OVERDOSE LITTERING.TRASH.DUMPING  TRESPASSING
## 2              1.468976e-05            2.017818e-06 1.510837e-05
## 3              5.998123e-06            1.376145e-06 7.562140e-06
##   HARASSMENT.STALKING DRIVING.UNDER.THE.INFLUENCE   FIRE.OTHER
## 2        6.529590e-06                2.208542e-04 1.862995e-05
## 3        2.612887e-06                7.449833e-05 5.584414e-06
##   POL.INFORMATION LOST.PROPERTY RECOVERED.PROPERTY.MONT..CO.
## 2    6.244082e-05  1.199309e-04                 3.292426e-05
## 3    1.504916e-05  4.048773e-05                 1.150863e-05
##   community_facilities_count Number_of_Crimes_2014   Bedrooms  Baths.All
## 2                0.007503704          5.321357e-05 0.03901113 0.04347633
## 3                0.011160954          6.934534e-05 0.04915004 0.04913822
##   Type.yBack-to-Back Type.yDetached Type.yDuplex Type.yDwelling w/Rental
## 2       1.209654e-16     0.05233219 2.019629e-04            1.865602e-13
## 3       3.135699e-10     0.04321516 1.042249e-11            6.672444e-18
##   Type.yGarden 1-4 Floors Type.yHi-Rise 9+ Floors Type.yHouse of Worship
## 2             0.041166218              0.05485336           8.439325e-15
## 3             0.004123571              0.01764819           9.859520e-17
##   Type.yMid-Rise 5-8 Floors Type.yMulti-Family  Type.yOther
## 2              0.0018346724       0.0001945807 0.0005766579
## 3              0.0008557773       0.0001943774 0.0002634886
##   Type.yPatio Home Type.yPenthouse   Type.yQuad Type.ySemi-Detached
## 2     0.0009542987    1.609874e-19 7.759981e-13        0.0011475491
## 3     0.0001490200    8.687959e-22 1.591584e-09        0.0009517327
##   Type.yTownhouse Has.GarageTRUE
## 2      0.05229843     0.05013745
## 3      0.04036205     0.03938284
## 
## Residual Deviance: 7431.002 
## AIC: 7607.002
#tidy(M10)

pr1=predict(M10,test1,"probs")
# A sample of Class predicted probabilities for some records
head(pr1)
##                 1          2            3
## 2892 1.197786e-02 0.82612312 1.618990e-01
## 4048 1.004290e-03 0.27800720 7.209885e-01
## 6246 2.221725e-04 0.50038952 4.993883e-01
## 9911 9.067750e-01 0.09319558 2.939987e-05
## 2200 1.091404e-06 0.05158395 9.484150e-01
## 9803 8.630302e-01 0.13695430 1.545830e-05
test1$pr=apply(pr1,1,which.max)
c(Accuracy=mean(test1$pr==test1$class))
##  Accuracy 
## 0.7999078
confusionMatrix(test$pr,test$class)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 435  71   0
##          2  71 938 112
##          3   1  71 470
## 
## Overall Statistics
##                                          
##                Accuracy : 0.8497         
##                  95% CI : (0.834, 0.8645)
##     No Information Rate : 0.4979         
##     P-Value [Acc > NIR] : < 2e-16        
##                                          
##                   Kappa : 0.758          
##  Mcnemar's Test P-Value : 0.01705        
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8580   0.8685   0.8076
## Specificity            0.9573   0.8320   0.9546
## Pos Pred Value         0.8597   0.8368   0.8672
## Neg Pred Value         0.9567   0.8645   0.9312
## Prevalence             0.2337   0.4979   0.2683
## Detection Rate         0.2006   0.4325   0.2167
## Detection Prevalence   0.2333   0.5168   0.2499
## Balanced Accuracy      0.9076   0.8502   0.8811
#Cross-validation method for Average multninomial logit accuracy

Accuracy=numeric(30)
for ( i in 1:30){
  
  index=nrow(main)
  index2=sample(index, round(index/5))
  train=main[-index2,]
  test=main[index2,]
  
  train1=train[,c(2:24,25,27,45,44,50,52,54)]
  test1=test[,c(2:24,25,27,45,44,50,52,54)]
  
  M10=multinom(class~.,data=train1)
  pr1=predict(M10,test1,"probs")
  test1$pr=apply(pr1,1,which.max)
  Accuracy[i]=mean(test1$pr==test1$class)
}
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7105.839697
## iter  20 value 6023.496175
## iter  30 value 5445.020137
## iter  40 value 5007.965309
## iter  50 value 4749.463400
## iter  60 value 4244.785823
## iter  70 value 4186.857940
## iter  80 value 4138.058929
## iter  90 value 4118.467423
## iter 100 value 4082.469492
## final  value 4082.469492 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7135.014947
## iter  20 value 6059.680516
## iter  30 value 5475.088847
## iter  40 value 5023.842596
## iter  50 value 4738.586909
## iter  60 value 4337.107049
## iter  70 value 4185.268935
## iter  80 value 4123.814479
## iter  90 value 4086.519351
## iter 100 value 4067.653114
## final  value 4067.653114 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7208.317916
## iter  20 value 6005.214442
## iter  30 value 5464.227204
## iter  40 value 5018.584316
## iter  50 value 4751.177540
## iter  60 value 4225.319946
## iter  70 value 4174.280911
## iter  80 value 4143.157977
## iter  90 value 4097.287034
## iter 100 value 4058.421959
## final  value 4058.421959 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7166.782192
## iter  20 value 6028.409496
## iter  30 value 5467.404732
## iter  40 value 5003.532293
## iter  50 value 4762.222360
## iter  60 value 4258.096296
## iter  70 value 4195.468185
## iter  80 value 4134.634417
## iter  90 value 4091.730016
## iter 100 value 4061.128377
## final  value 4061.128377 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7022.418962
## iter  20 value 5997.342246
## iter  30 value 5472.542457
## iter  40 value 5023.248985
## iter  50 value 4738.037520
## iter  60 value 4269.994764
## iter  70 value 4180.103675
## iter  80 value 4128.367869
## iter  90 value 4096.169057
## iter 100 value 4070.347236
## final  value 4070.347236 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7202.614986
## iter  20 value 6010.096109
## iter  30 value 5426.072320
## iter  40 value 4982.700701
## iter  50 value 4685.328886
## iter  60 value 4186.092120
## iter  70 value 4130.509278
## iter  80 value 4116.043807
## iter  90 value 4066.545502
## iter 100 value 4023.034098
## final  value 4023.034098 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7140.846723
## iter  20 value 6217.601798
## iter  30 value 5466.521834
## iter  40 value 4990.748137
## iter  50 value 4770.106584
## iter  60 value 4263.999393
## iter  70 value 4227.458309
## iter  80 value 4175.355555
## iter  90 value 4150.559695
## iter 100 value 4108.548111
## final  value 4108.548111 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7236.175801
## iter  20 value 6069.491718
## iter  30 value 5464.039860
## iter  40 value 5018.690733
## iter  50 value 4769.447439
## iter  60 value 4218.373407
## iter  70 value 4156.000673
## iter  80 value 4139.063770
## iter  90 value 4086.610972
## iter 100 value 4052.170363
## final  value 4052.170363 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7190.349366
## iter  20 value 5730.693508
## iter  30 value 5076.572623
## iter  40 value 4617.835915
## iter  50 value 4346.226195
## iter  60 value 3799.541337
## iter  70 value 3743.762615
## iter  80 value 3682.870004
## iter  90 value 3625.771219
## iter 100 value 3571.836094
## final  value 3571.836094 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7273.558162
## iter  20 value 6112.751289
## iter  30 value 5474.007824
## iter  40 value 5022.617767
## iter  50 value 4765.617697
## iter  60 value 4274.133519
## iter  70 value 4192.081064
## iter  80 value 4136.698999
## iter  90 value 4106.209165
## iter 100 value 4077.344476
## final  value 4077.344476 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7174.034784
## iter  20 value 6106.183351
## iter  30 value 5483.298746
## iter  40 value 5021.305274
## iter  50 value 4754.061972
## iter  60 value 4250.456333
## iter  70 value 4213.615577
## iter  80 value 4156.869602
## iter  90 value 4106.622201
## iter 100 value 4067.507285
## final  value 4067.507285 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7261.751908
## iter  20 value 6045.948425
## iter  30 value 5477.426182
## iter  40 value 5025.821033
## iter  50 value 4748.324486
## iter  60 value 4419.279502
## iter  70 value 4187.946854
## iter  80 value 4141.722413
## iter  90 value 4106.162089
## iter 100 value 4064.314214
## final  value 4064.314214 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7080.283786
## iter  20 value 6058.801428
## iter  30 value 5482.469334
## iter  40 value 5024.469111
## iter  50 value 4772.232679
## iter  60 value 4299.205513
## iter  70 value 4209.622877
## iter  80 value 4150.647096
## iter  90 value 4102.727909
## iter 100 value 4079.964787
## final  value 4079.964787 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7087.466181
## iter  20 value 5962.590047
## iter  30 value 5307.836619
## iter  40 value 4875.993200
## iter  50 value 4602.081792
## iter  60 value 4038.053169
## iter  70 value 3974.354781
## iter  80 value 3944.183305
## iter  90 value 3916.760358
## iter 100 value 3878.188579
## final  value 3878.188579 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7200.227027
## iter  20 value 6077.972104
## iter  30 value 5509.781949
## iter  40 value 5049.285797
## iter  50 value 4792.212236
## iter  60 value 4261.471543
## iter  70 value 4233.531850
## iter  80 value 4179.810943
## iter  90 value 4132.787544
## iter 100 value 4097.884576
## final  value 4097.884576 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7856.316274
## iter  20 value 6102.301223
## iter  30 value 5398.324846
## iter  40 value 4968.335143
## iter  50 value 4681.658270
## iter  60 value 4093.152759
## iter  70 value 4043.341653
## iter  80 value 4022.443177
## iter  90 value 3980.886936
## iter 100 value 3951.918703
## final  value 3951.918703 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7804.713747
## iter  20 value 6188.629836
## iter  30 value 5363.594779
## iter  40 value 4909.150939
## iter  50 value 4625.239873
## iter  60 value 4080.241645
## iter  70 value 4040.213520
## iter  80 value 3984.152458
## iter  90 value 3953.513140
## iter 100 value 3916.372736
## final  value 3916.372736 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7188.549569
## iter  20 value 5904.022514
## iter  30 value 5360.103007
## iter  40 value 4924.909673
## iter  50 value 4650.108939
## iter  60 value 4089.192050
## iter  70 value 4031.149680
## iter  80 value 4010.681316
## iter  90 value 3969.927233
## iter 100 value 3923.423301
## final  value 3923.423301 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7375.872033
## iter  20 value 6090.585100
## iter  30 value 5467.543515
## iter  40 value 4980.042136
## iter  50 value 4734.314456
## iter  60 value 4195.292645
## iter  70 value 4168.339358
## iter  80 value 4106.998503
## iter  90 value 4063.648565
## iter 100 value 4050.394481
## final  value 4050.394481 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7033.906586
## iter  20 value 5828.231196
## iter  30 value 5123.018317
## iter  40 value 4623.393673
## iter  50 value 4362.988424
## iter  60 value 3807.197740
## iter  70 value 3766.607725
## iter  80 value 3706.908738
## iter  90 value 3649.413003
## iter 100 value 3599.811434
## final  value 3599.811434 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7127.284728
## iter  20 value 6012.426576
## iter  30 value 5407.461383
## iter  40 value 4976.596076
## iter  50 value 4730.332076
## iter  60 value 4198.119005
## iter  70 value 4152.836119
## iter  80 value 4115.832669
## iter  90 value 4070.209195
## iter 100 value 4042.579084
## final  value 4042.579084 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7073.150645
## iter  20 value 5793.294560
## iter  30 value 5130.347001
## iter  40 value 4691.391994
## iter  50 value 4410.077179
## iter  60 value 4113.440869
## iter  70 value 3801.304935
## iter  80 value 3737.824758
## iter  90 value 3678.121273
## iter 100 value 3634.774243
## final  value 3634.774243 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7225.487861
## iter  20 value 6053.719431
## iter  30 value 5215.611917
## iter  40 value 4698.567276
## iter  50 value 4451.473067
## iter  60 value 4109.102026
## iter  70 value 3855.275404
## iter  80 value 3833.125433
## iter  90 value 3794.294053
## iter 100 value 3738.240342
## final  value 3738.240342 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7153.641837
## iter  20 value 6085.890561
## iter  30 value 5443.420213
## iter  40 value 5038.606641
## iter  50 value 4752.814594
## iter  60 value 4310.620702
## iter  70 value 4214.795374
## iter  80 value 4171.238959
## iter  90 value 4126.806942
## iter 100 value 4107.981210
## final  value 4107.981210 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7042.298483
## iter  20 value 6192.491305
## iter  30 value 5418.478648
## iter  40 value 4983.261262
## iter  50 value 4716.474210
## iter  60 value 4181.572706
## iter  70 value 4125.474975
## iter  80 value 4097.046307
## iter  90 value 4063.649070
## iter 100 value 4028.598656
## final  value 4028.598656 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7200.214615
## iter  20 value 6022.668539
## iter  30 value 5360.896200
## iter  40 value 4948.762893
## iter  50 value 4689.223030
## iter  60 value 4184.011941
## iter  70 value 4130.646980
## iter  80 value 4087.982500
## iter  90 value 4036.372147
## iter 100 value 3996.775720
## final  value 3996.775720 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7094.502291
## iter  20 value 6151.584977
## iter  30 value 5502.131229
## iter  40 value 5039.699109
## iter  50 value 4766.266733
## iter  60 value 4591.709306
## iter  70 value 4277.711178
## iter  80 value 4162.506920
## iter  90 value 4112.279841
## iter 100 value 4067.416895
## final  value 4067.416895 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7097.860746
## iter  20 value 6015.157372
## iter  30 value 5481.868758
## iter  40 value 5020.102096
## iter  50 value 4745.198321
## iter  60 value 4289.080117
## iter  70 value 4137.829026
## iter  80 value 4081.524922
## iter  90 value 4030.484729
## iter 100 value 3991.372766
## final  value 3991.372766 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7268.362291
## iter  20 value 6055.808026
## iter  30 value 5503.086476
## iter  40 value 5053.442152
## iter  50 value 4789.741391
## iter  60 value 4419.993241
## iter  70 value 4227.328984
## iter  80 value 4184.518989
## iter  90 value 4150.726732
## iter 100 value 4115.177652
## final  value 4115.177652 
## stopped after 100 iterations
## # weights:  135 (88 variable)
## initial  value 9529.362992 
## iter  10 value 7681.985262
## iter  20 value 6057.289778
## iter  30 value 5409.816375
## iter  40 value 4946.592599
## iter  50 value 4671.682045
## iter  60 value 4133.695070
## iter  70 value 4087.186213
## iter  80 value 4034.487235
## iter  90 value 3980.387336
## iter 100 value 3943.950631
## final  value 3943.950631 
## stopped after 100 iterations
mean(Accuracy)
## [1] 0.8175042
################ Reression for price difference #########################
#add a variable "price_dif" which is the difference of original price and closing price
#The goal is to predict the reduction or increase in hosuing price in the market based on its attributes
# log of price is used because of its large amount

main$Median_Sales=as.numeric(main$Median_Sales)
index=nrow(main)
index2=sample(index, round(index/5))
train=main[-index2,]
test=main[index2,]

#effect of crime
train1=train[,c(2:24,53)]
ml1=glm(price_dif~.,data = train1)
summary(ml1)
## 
## Call:
## glm(formula = price_dif ~ ., data = train1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.76270  -0.02206   0.01200   0.03285   0.40470  
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -0.035262   0.006923  -5.093 3.59e-07 ***
## ROB.FIREARM...STREET             -2.420737   5.326994  -0.454  0.64953    
## AGG.ASSLT.FIREARM.CITIZEN        16.961650  11.562539   1.467  0.14243    
## BURG.FORCE.RES.NIGHT              4.768273   2.352337   2.027  0.04269 *  
## LARCENY.PICK.POCKET               1.324213   0.314893   4.205 2.63e-05 ***
## AUTO.THEFT...PASSENGER.VEHICLE   -9.702350   6.653880  -1.458  0.14484    
## ASSAULT...BATTERY...CITIZEN     -10.661660   3.342767  -3.189  0.00143 ** 
## VANDALISM.MOTOR.VEHICLE           1.477270   2.053873   0.719  0.47200    
## WEAPON.POSSESSION.HANDGUN        18.591372  11.392240   1.632  0.10273    
## SEX.OFFENSE...SEX..ASSAULT       18.631188  14.259604   1.307  0.19139    
## drug                              1.217763   1.449388   0.840  0.40082    
## FAMILY.OFFENSE...ABUSE.CHILD    -10.109937  11.205304  -0.902  0.36695    
## JUVENILE.RUNAWAY                  8.292250   5.914022   1.402  0.16091    
## LIQUOR...UNLAWFUL.POSS.UNDER.21   0.185191   3.911979   0.047  0.96224    
## DISORDERLY.CONDUCT                0.648324   2.023401   0.320  0.74866    
## SUICIDE...POISON.OVERDOSE        -2.132457  13.272665  -0.161  0.87236    
## LITTERING.TRASH.DUMPING         -71.657984  21.999301  -3.257  0.00113 ** 
## TRESPASSING                       5.139563   5.322225   0.966  0.33423    
## HARASSMENT.STALKING             -69.408313  18.130170  -3.828  0.00013 ***
## DRIVING.UNDER.THE.INFLUENCE       0.163945   1.087251   0.151  0.88015    
## FIRE.OTHER                       -1.413327  13.841924  -0.102  0.91868    
## POL.INFORMATION                   1.337901   1.228669   1.089  0.27623    
## LOST.PROPERTY                    -7.285919   2.546216  -2.861  0.00423 ** 
## RECOVERED.PROPERTY.MONT..CO.      6.764239   4.123758   1.640  0.10098    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.003611934)
## 
##     Null deviance: 31.592  on 8672  degrees of freedom
## Residual deviance: 31.240  on 8649  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: -24134
## 
## Number of Fisher Scoring iterations: 2
#eefect of housing attributes
train1=train[,c(25,27,31,36,40,44,45,50,52,53)]
ml1=glm(price_dif~.-Original.List.Price-Median_Sales+log(Original.List.Price)+log(Median_Sales),data = train1)
summary(ml1)
## 
## Call:
## glm(formula = price_dif ~ . - Original.List.Price - Median_Sales + 
##     log(Original.List.Price) + log(Median_Sales), data = train1)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.75500  -0.02270   0.01059   0.03099   0.40618  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.478e-01  2.297e-02   6.437 1.29e-10 ***
## community_facilities_count -2.644e-04  1.167e-04  -2.265 0.023539 *  
## Number_of_Crimes_2014       1.350e-07  7.811e-07   0.173 0.862809    
## Date.Quarter2               1.433e-02  1.940e-03   7.386 1.66e-13 ***
## Date.Quarter3               3.126e-03  2.085e-03   1.499 0.133941    
## Date.Quarter4              -5.806e-03  2.149e-03  -2.702 0.006906 ** 
## Baths.All                  -7.261e-04  4.564e-04  -1.591 0.111670    
## Bedrooms                   -5.977e-04  9.497e-04  -0.629 0.529171    
## Type.yBack-to-Back         -1.153e-02  1.343e-02  -0.859 0.390508    
## Type.yDetached             -1.980e-03  5.518e-03  -0.359 0.719771    
## Type.yDuplex               -2.907e-02  1.789e-02  -1.625 0.104288    
## Type.yDwelling w/Rental    -1.231e-01  4.251e-02  -2.897 0.003782 ** 
## Type.yGarden 1-4 Floors    -2.587e-02  5.806e-03  -4.456 8.44e-06 ***
## Type.yHi-Rise 9+ Floors    -3.180e-02  6.043e-03  -5.261 1.46e-07 ***
## Type.yMid-Rise 5-8 Floors  -2.616e-02  8.276e-03  -3.161 0.001576 ** 
## Type.yMulti-Family         -1.353e-02  2.297e-02  -0.589 0.556009    
## Type.yOther                -4.800e-02  1.458e-02  -3.293 0.000997 ***
## Type.yPatio Home           -1.377e-04  1.067e-02  -0.013 0.989696    
## Type.yPenthouse            -5.087e-02  4.215e-02  -1.207 0.227545    
## Type.yQuad                 -4.121e-02  4.216e-02  -0.977 0.328386    
## Type.ySemi-Detached        -1.541e-04  1.297e-02  -0.012 0.990520    
## Type.yTownhouse             3.384e-03  5.473e-03   0.618 0.536412    
## Has.GarageTRUE              1.880e-03  1.485e-03   1.266 0.205610    
## log(Original.List.Price)   -1.425e-02  1.956e-03  -7.286 3.47e-13 ***
## log(Median_Sales)           4.952e-03  9.623e-04   5.146 2.72e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.003490452)
## 
##     Null deviance: 31.592  on 8672  degrees of freedom
## Residual deviance: 30.185  on 8648  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: -24430
## 
## Number of Fisher Scoring iterations: 2
tidy(ml1)
##                          term      estimate    std.error   statistic
## 1                 (Intercept)  1.478356e-01 2.296758e-02  6.43670859
## 2  community_facilities_count -2.644223e-04 1.167438e-04 -2.26497929
## 3       Number_of_Crimes_2014  1.349851e-07 7.811420e-07  0.17280478
## 4               Date.Quarter2  1.432851e-02 1.939992e-03  7.38585991
## 5               Date.Quarter3  3.125864e-03 2.085463e-03  1.49888271
## 6               Date.Quarter4 -5.805675e-03 2.148684e-03 -2.70196835
## 7                   Baths.All -7.260913e-04 4.564046e-04 -1.59089393
## 8                    Bedrooms -5.976500e-04 9.497090e-04 -0.62929804
## 9          Type.yBack-to-Back -1.153305e-02 1.343020e-02 -0.85873985
## 10             Type.yDetached -1.979723e-03 5.517978e-03 -0.35877688
## 11               Type.yDuplex -2.906914e-02 1.789330e-02 -1.62458292
## 12    Type.yDwelling w/Rental -1.231304e-01 4.250939e-02 -2.89654632
## 13    Type.yGarden 1-4 Floors -2.587230e-02 5.805587e-03 -4.45644878
## 14    Type.yHi-Rise 9+ Floors -3.179547e-02 6.043154e-03 -5.26140305
## 15  Type.yMid-Rise 5-8 Floors -2.616366e-02 8.276083e-03 -3.16135739
## 16         Type.yMulti-Family -1.352679e-02 2.297341e-02 -0.58880225
## 17                Type.yOther -4.800006e-02 1.457847e-02 -3.29252991
## 18           Type.yPatio Home -1.377493e-04 1.066568e-02 -0.01291520
## 19            Type.yPenthouse -5.087148e-02 4.215425e-02 -1.20679355
## 20                 Type.yQuad -4.121091e-02 4.216269e-02 -0.97742621
## 21        Type.ySemi-Detached -1.541212e-04 1.297047e-02 -0.01188246
## 22            Type.yTownhouse  3.383860e-03 5.473084e-03  0.61827302
## 23             Has.GarageTRUE  1.880142e-03 1.485310e-03  1.26582428
## 24   log(Original.List.Price) -1.425253e-02 1.956122e-03 -7.28611518
## 25          log(Median_Sales)  4.952213e-03 9.623255e-04  5.14608958
##         p.value
## 1  1.285869e-10
## 2  2.353859e-02
## 3  8.628089e-01
## 4  1.655346e-13
## 5  1.339406e-01
## 6  6.906480e-03
## 7  1.116700e-01
## 8  5.291706e-01
## 9  3.905079e-01
## 10 7.197708e-01
## 11 1.042879e-01
## 12 3.782410e-03
## 13 8.437757e-06
## 14 1.463862e-07
## 15 1.575780e-03
## 16 5.560093e-01
## 17 9.968877e-04
## 18 9.896957e-01
## 19 2.275447e-01
## 20 3.283855e-01
## 21 9.905197e-01
## 22 5.364117e-01
## 23 2.056101e-01
## 24 3.470364e-13
## 25 2.718178e-07
ml2=xtable(ml1)
write.csv(ml2,"23.csv")




######################Regression for Days in Market
#The goal is to build a regression model to predict how long a certain house will be in Market before being sold

main$Median_Sales=as.numeric(main$Median_Sales)
index=nrow(main)
index2=sample(index, round(index/5))
train=main[-index2,]
test=main[index2,]

train1=train[,c(36,40,44,45,50,52,43)]
ml1=lm(DOMP~.-Original.List.Price+log(Original.List.Price),data = train1)
#ml2=step(ml1,direction = "both",scope = list(lower=DOMP~1,upper=DOMP~.),k=3)
summary(ml1)
## 
## Call:
## lm(formula = DOMP ~ . - Original.List.Price + log(Original.List.Price), 
##     data = train1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -337.80  -36.22  -20.03   12.89  865.59 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -107.2065    22.6302  -4.737 2.20e-06 ***
## Date.Quarter2              -19.6676     2.1704  -9.062  < 2e-16 ***
## Date.Quarter3              -13.8429     2.3216  -5.963 2.58e-09 ***
## Date.Quarter4                1.3023     2.3949   0.544  0.58662    
## Baths.All                    2.6765     0.5102   5.246 1.59e-07 ***
## Bedrooms                     2.6131     1.0411   2.510  0.01209 *  
## Type.yBack-to-Back          20.8632    15.1461   1.377  0.16841    
## Type.yDetached              12.9059     6.3847   2.021  0.04327 *  
## Type.yDuplex                30.2235    17.7212   1.705  0.08814 .  
## Type.yDwelling w/Rental    -13.9455    47.6832  -0.292  0.76994    
## Type.yGarden 1-4 Floors     30.3590     6.7039   4.529 6.02e-06 ***
## Type.yHi-Rise 9+ Floors     38.1127     6.9583   5.477 4.44e-08 ***
## Type.yHouse of Worship     -39.7240    66.6380  -0.596  0.55111    
## Type.yMid-Rise 5-8 Floors   37.7133     9.2661   4.070 4.74e-05 ***
## Type.yMulti-Family           1.7928    30.3172   0.059  0.95285    
## Type.yOther                 21.6248    18.2179   1.187  0.23526    
## Type.yPatio Home             5.1864    13.3213   0.389  0.69704    
## Type.yPenthouse            142.4972    47.3342   3.010  0.00262 ** 
## Type.yQuad                  56.7905    66.6152   0.853  0.39395    
## Type.ySemi-Detached         20.3164    15.1529   1.341  0.18003    
## Type.yTownhouse              8.9375     6.3335   1.411  0.15824    
## Has.GarageTRUE               5.3143     1.6535   3.214  0.00131 ** 
## log(Original.List.Price)     9.9225     1.8302   5.421 6.07e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 66.31 on 8651 degrees of freedom
## Multiple R-squared:  0.04977,    Adjusted R-squared:  0.04735 
## F-statistic: 20.59 on 22 and 8651 DF,  p-value: < 2.2e-16
tidy(ml1)
##                         term    estimate  std.error   statistic
## 1                (Intercept) -107.206530 22.6301732 -4.73732696
## 2              Date.Quarter2  -19.667567  2.1704108 -9.06167963
## 3              Date.Quarter3  -13.842911  2.3216164 -5.96261779
## 4              Date.Quarter4    1.302255  2.3949307  0.54375496
## 5                  Baths.All    2.676525  0.5102187  5.24583909
## 6                   Bedrooms    2.613084  1.0410786  2.50997730
## 7         Type.yBack-to-Back   20.863189 15.1461445  1.37745874
## 8             Type.yDetached   12.905937  6.3847395  2.02137259
## 9               Type.yDuplex   30.223452 17.7212388  1.70549318
## 10   Type.yDwelling w/Rental  -13.945487 47.6832022 -0.29246121
## 11   Type.yGarden 1-4 Floors   30.358961  6.7038788  4.52856651
## 12   Type.yHi-Rise 9+ Floors   38.112699  6.9582626  5.47732981
## 13    Type.yHouse of Worship  -39.724007 66.6379837 -0.59611658
## 14 Type.yMid-Rise 5-8 Floors   37.713275  9.2661011  4.07002626
## 15        Type.yMulti-Family    1.792797 30.3172394  0.05913457
## 16               Type.yOther   21.624833 18.2178637  1.18701253
## 17          Type.yPatio Home    5.186374 13.3213271  0.38932861
## 18           Type.yPenthouse  142.497159 47.3342201  3.01044696
## 19                Type.yQuad   56.790479 66.6152141  0.85251514
## 20       Type.ySemi-Detached   20.316445 15.1529301  1.34076019
## 21           Type.yTownhouse    8.937468  6.3334762  1.41114729
## 22            Has.GarageTRUE    5.314255  1.6535164  3.21391149
## 23  log(Original.List.Price)    9.922493  1.8302322  5.42143969
##         p.value
## 1  2.200052e-06
## 2  1.566354e-19
## 3  2.579601e-09
## 4  5.866241e-01
## 5  1.592566e-07
## 6  1.209200e-02
## 7  1.684061e-01
## 8  4.327194e-02
## 9  8.813838e-02
## 10 7.699410e-01
## 11 6.018017e-06
## 12 4.439015e-08
## 13 5.511129e-01
## 14 4.742603e-05
## 15 9.528463e-01
## 16 2.352553e-01
## 17 6.970427e-01
## 18 2.616161e-03
## 19 3.939518e-01
## 20 1.800335e-01
## 21 1.582371e-01
## 22 1.314204e-03
## 23 6.071356e-08
ml2=xtable(ml1)
write.csv(ml2,"results1.csv")
visreg(ml1)

########################## Regression models for housing price and PCA ( By Natasha)########################

main=data_4
#change the name of main so this works with my code
full <- main
names(full)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "Type.x"                              
## [29] "IRS_Estimated_Population_2014"       
## [30] "Total_Number_of_Sales_State_Planning"
## [31] "Median_Sales"                        
## [32] "Mean_Sales"                          
## [33] "ML."                                 
## [34] "City"                                
## [35] "List.Price"                          
## [36] "Original.List.Price"                 
## [37] "Close.Price"                         
## [38] "Legal.Subdivision"                   
## [39] "Status"                              
## [40] "Date.Quarter"                        
## [41] "Close.Date"                          
## [42] "DOMM"                                
## [43] "DOMP"                                
## [44] "Baths.All"                           
## [45] "Bedrooms"                            
## [46] "Condo.Coop.Fee"                      
## [47] "HOA.Fee"                             
## [48] "Lot.Sqft"                            
## [49] "Total.Square.Footage"                
## [50] "Type.y"                              
## [51] "Parking"                             
## [52] "Has.Garage"
#remove Condo.Coop.Fee, HOA.Fee, lot.sqft (46-48) because they are not needed and have NAs
#51 and 52 were for Arash's analysis and have been removed
full <- full[,-c(46:48,51,52)]
names(full)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "Type.x"                              
## [29] "IRS_Estimated_Population_2014"       
## [30] "Total_Number_of_Sales_State_Planning"
## [31] "Median_Sales"                        
## [32] "Mean_Sales"                          
## [33] "ML."                                 
## [34] "City"                                
## [35] "List.Price"                          
## [36] "Original.List.Price"                 
## [37] "Close.Price"                         
## [38] "Legal.Subdivision"                   
## [39] "Status"                              
## [40] "Date.Quarter"                        
## [41] "Close.Date"                          
## [42] "DOMM"                                
## [43] "DOMP"                                
## [44] "Baths.All"                           
## [45] "Bedrooms"                            
## [46] "Total.Square.Footage"                
## [47] "Type.y"
#how many NAs?
row.has.na <- apply(full, 1, function(x){any(is.na(x))})


#get rid of rows with na
full <- full[!row.has.na,]
#only 18 records have been removed

#remove $ from median_sales and mean_sales
full$median_sales_num<-substring(full$Median_Sales, 2)
full$mean_sales_num<-substring(full$Mean_Sales, 2)

#change from character to numeric
full$mean_sales_num <- as.numeric(gsub(",", "", full$mean_sales_num))
full$median_sales_num <- as.numeric(gsub(",","", full$median_sales_num))

#another try at omitting NAs because they seem to still be there
full <- na.omit(full)

#create a dataframe with only numeric and factor data
#get the indexes for all of the columns
names(full)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "Type.x"                              
## [29] "IRS_Estimated_Population_2014"       
## [30] "Total_Number_of_Sales_State_Planning"
## [31] "Median_Sales"                        
## [32] "Mean_Sales"                          
## [33] "ML."                                 
## [34] "City"                                
## [35] "List.Price"                          
## [36] "Original.List.Price"                 
## [37] "Close.Price"                         
## [38] "Legal.Subdivision"                   
## [39] "Status"                              
## [40] "Date.Quarter"                        
## [41] "Close.Date"                          
## [42] "DOMM"                                
## [43] "DOMP"                                
## [44] "Baths.All"                           
## [45] "Bedrooms"                            
## [46] "Total.Square.Footage"                
## [47] "Type.y"                              
## [48] "median_sales_num"                    
## [49] "mean_sales_num"
#get the classes for all of the columns
lapply(full, class)
## $Zip.Code
## [1] "integer"
## 
## $ROB.FIREARM...STREET
## [1] "numeric"
## 
## $AGG.ASSLT.FIREARM.CITIZEN
## [1] "numeric"
## 
## $BURG.FORCE.RES.NIGHT
## [1] "numeric"
## 
## $LARCENY.PICK.POCKET
## [1] "numeric"
## 
## $AUTO.THEFT...PASSENGER.VEHICLE
## [1] "numeric"
## 
## $ASSAULT...BATTERY...CITIZEN
## [1] "numeric"
## 
## $VANDALISM.MOTOR.VEHICLE
## [1] "numeric"
## 
## $WEAPON.POSSESSION.HANDGUN
## [1] "numeric"
## 
## $SEX.OFFENSE...SEX..ASSAULT
## [1] "numeric"
## 
## $drug
## [1] "numeric"
## 
## $FAMILY.OFFENSE...ABUSE.CHILD
## [1] "numeric"
## 
## $JUVENILE.RUNAWAY
## [1] "numeric"
## 
## $LIQUOR...UNLAWFUL.POSS.UNDER.21
## [1] "numeric"
## 
## $DISORDERLY.CONDUCT
## [1] "numeric"
## 
## $SUICIDE...POISON.OVERDOSE
## [1] "numeric"
## 
## $LITTERING.TRASH.DUMPING
## [1] "numeric"
## 
## $TRESPASSING
## [1] "numeric"
## 
## $HARASSMENT.STALKING
## [1] "numeric"
## 
## $DRIVING.UNDER.THE.INFLUENCE
## [1] "numeric"
## 
## $FIRE.OTHER
## [1] "numeric"
## 
## $POL.INFORMATION
## [1] "numeric"
## 
## $LOST.PROPERTY
## [1] "numeric"
## 
## $RECOVERED.PROPERTY.MONT..CO.
## [1] "numeric"
## 
## $community_facilities_count
## [1] "integer"
## 
## $Number_of_Sales_2014
## [1] "integer"
## 
## $Number_of_Crimes_2014
## [1] "integer"
## 
## $Type.x
## [1] "factor"
## 
## $IRS_Estimated_Population_2014
## [1] "integer"
## 
## $Total_Number_of_Sales_State_Planning
## [1] "integer"
## 
## $Median_Sales
## [1] "factor"
## 
## $Mean_Sales
## [1] "factor"
## 
## $ML.
## [1] "factor"
## 
## $City
## [1] "factor"
## 
## $List.Price
## [1] "numeric"
## 
## $Original.List.Price
## [1] "numeric"
## 
## $Close.Price
## [1] "numeric"
## 
## $Legal.Subdivision
## [1] "factor"
## 
## $Status
## [1] "factor"
## 
## $Date.Quarter
## [1] "integer"
## 
## $Close.Date
## [1] "factor"
## 
## $DOMM
## [1] "integer"
## 
## $DOMP
## [1] "integer"
## 
## $Baths.All
## [1] "integer"
## 
## $Bedrooms
## [1] "integer"
## 
## $Total.Square.Footage
## [1] "integer"
## 
## $Type.y
## [1] "factor"
## 
## $median_sales_num
## [1] "numeric"
## 
## $mean_sales_num
## [1] "numeric"
#reclass columns we will keep to numeric or factor
full$Zip.Code <- factor(full$Zip.Code)




#delete the columns with dollar signs for median and mean housing price, type of sale (type.y) was removed because it is all standard
full <- full[,-c(28,31,32)]

names(full)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "IRS_Estimated_Population_2014"       
## [29] "Total_Number_of_Sales_State_Planning"
## [30] "ML."                                 
## [31] "City"                                
## [32] "List.Price"                          
## [33] "Original.List.Price"                 
## [34] "Close.Price"                         
## [35] "Legal.Subdivision"                   
## [36] "Status"                              
## [37] "Date.Quarter"                        
## [38] "Close.Date"                          
## [39] "DOMM"                                
## [40] "DOMP"                                
## [41] "Baths.All"                           
## [42] "Bedrooms"                            
## [43] "Total.Square.Footage"                
## [44] "Type.y"                              
## [45] "median_sales_num"                    
## [46] "mean_sales_num"
#full <- full[,-c(33:36, 39)]

#check to see that the classes of the remaining columns are all numeric (including integer), factor
lapply(full, class)
## $Zip.Code
## [1] "factor"
## 
## $ROB.FIREARM...STREET
## [1] "numeric"
## 
## $AGG.ASSLT.FIREARM.CITIZEN
## [1] "numeric"
## 
## $BURG.FORCE.RES.NIGHT
## [1] "numeric"
## 
## $LARCENY.PICK.POCKET
## [1] "numeric"
## 
## $AUTO.THEFT...PASSENGER.VEHICLE
## [1] "numeric"
## 
## $ASSAULT...BATTERY...CITIZEN
## [1] "numeric"
## 
## $VANDALISM.MOTOR.VEHICLE
## [1] "numeric"
## 
## $WEAPON.POSSESSION.HANDGUN
## [1] "numeric"
## 
## $SEX.OFFENSE...SEX..ASSAULT
## [1] "numeric"
## 
## $drug
## [1] "numeric"
## 
## $FAMILY.OFFENSE...ABUSE.CHILD
## [1] "numeric"
## 
## $JUVENILE.RUNAWAY
## [1] "numeric"
## 
## $LIQUOR...UNLAWFUL.POSS.UNDER.21
## [1] "numeric"
## 
## $DISORDERLY.CONDUCT
## [1] "numeric"
## 
## $SUICIDE...POISON.OVERDOSE
## [1] "numeric"
## 
## $LITTERING.TRASH.DUMPING
## [1] "numeric"
## 
## $TRESPASSING
## [1] "numeric"
## 
## $HARASSMENT.STALKING
## [1] "numeric"
## 
## $DRIVING.UNDER.THE.INFLUENCE
## [1] "numeric"
## 
## $FIRE.OTHER
## [1] "numeric"
## 
## $POL.INFORMATION
## [1] "numeric"
## 
## $LOST.PROPERTY
## [1] "numeric"
## 
## $RECOVERED.PROPERTY.MONT..CO.
## [1] "numeric"
## 
## $community_facilities_count
## [1] "integer"
## 
## $Number_of_Sales_2014
## [1] "integer"
## 
## $Number_of_Crimes_2014
## [1] "integer"
## 
## $IRS_Estimated_Population_2014
## [1] "integer"
## 
## $Total_Number_of_Sales_State_Planning
## [1] "integer"
## 
## $ML.
## [1] "factor"
## 
## $City
## [1] "factor"
## 
## $List.Price
## [1] "numeric"
## 
## $Original.List.Price
## [1] "numeric"
## 
## $Close.Price
## [1] "numeric"
## 
## $Legal.Subdivision
## [1] "factor"
## 
## $Status
## [1] "factor"
## 
## $Date.Quarter
## [1] "integer"
## 
## $Close.Date
## [1] "factor"
## 
## $DOMM
## [1] "integer"
## 
## $DOMP
## [1] "integer"
## 
## $Baths.All
## [1] "integer"
## 
## $Bedrooms
## [1] "integer"
## 
## $Total.Square.Footage
## [1] "integer"
## 
## $Type.y
## [1] "factor"
## 
## $median_sales_num
## [1] "numeric"
## 
## $mean_sales_num
## [1] "numeric"
#garage is still logical, not sure if that is ok or not

#split dataset to test, train
index <- 1:nrow(full)
testindex <- sample(index, trunc(length(index)/5))
testset <- full[testindex,]
trainset <- full[-testindex,]

#split for development set
index <- 1:nrow(trainset)
devsetindex <- sample(index, trunc(length(index)/5))
devset <- full[devsetindex,]
trainset <- full[-devsetindex,]

#now we have a testset, devset and trainset

#lm
#See if some of the factors are imporant



#Baseline model
mylogit <- lm(Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + BURG.FORCE.RES.NIGHT, data = trainset
)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=229233
## Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + 
##     BURG.FORCE.RES.NIGHT
## 
## 
## Step:  AIC=229233
## Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN
## 
## 
## Step:  AIC=229233
## Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code
## 
## 
## Step:  AIC=229233
## Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     Zip.Code
## 
## 
## Step:  AIC=229233
## Close.Price ~ drug + LOST.PROPERTY + Zip.Code
## 
## 
## Step:  AIC=229233
## Close.Price ~ drug + Zip.Code
## 
## 
## Step:  AIC=229233
## Close.Price ~ Zip.Code
## 
##            Df  Sum of Sq        RSS    AIC
## <none>                   7.6993e+14 229233
## - Zip.Code 35 5.1898e+14 1.2889e+15 233856
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + 
##     BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ Zip.Code
## 
## 
##                          Step Df Deviance Resid. Df   Resid. Dev    AIC
## 1                                              9072 7.699268e+14 229233
## 2      - BURG.FORCE.RES.NIGHT  0    0.000      9072 7.699268e+14 229233
## 3 - AGG.ASSLT.FIREARM.CITIZEN  0    0.000      9072 7.699268e+14 229233
## 4       - LARCENY.PICK.POCKET  0    2.500      9072 7.699268e+14 229233
## 5   - VANDALISM.MOTOR.VEHICLE  0    0.375      9072 7.699268e+14 229233
## 6             - LOST.PROPERTY  0    2.375      9072 7.699268e+14 229233
## 7                      - drug  0    3.875      9072 7.699268e+14 229233
plot(mylogit)

#
#
#
#
#
#predict house price
devset$predicted_close_price1<-predict(mylogit, devset)
## Warning in predict.lm(mylogit, devset): prediction from a rank-deficient
## fit may be misleading
# how far off are the predicted prices
devset$difference1 <- devset$predicted_close_price1 - devset$Close.Price
devset$percent_error1 <- abs(devset$difference1/devset$Close.Price)

#Model 2 - The model from Model 1 final
mylogit <- lm(Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + Number_of_Crimes_2014 + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + BURG.FORCE.RES.NIGHT, data = trainset)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + BURG.FORCE.RES.NIGHT
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + AGG.ASSLT.FIREARM.CITIZEN
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
##                        Df  Sum of Sq        RSS    AIC
## <none>                               5.6217e+14 226377
## - Date.Quarter          1 2.4396e+11 5.6241e+14 226379
## - Baths.All             1 1.5544e+13 5.7771e+14 226623
## - Bedrooms              1 3.3119e+13 5.9529e+14 226896
## - Total.Square.Footage  1 5.1436e+13 6.1361e+14 227172
## - Zip.Code             35 3.7703e+14 9.3920e+14 230981
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Number_of_Crimes_2014 + drug + 
##     LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
## 
##                           Step Df Deviance Resid. Df   Resid. Dev      AIC
## 1                                               9068 5.621702e+14 226376.6
## 2       - BURG.FORCE.RES.NIGHT  0   0.0000      9068 5.621702e+14 226376.6
## 3  - AGG.ASSLT.FIREARM.CITIZEN  0   0.0000      9068 5.621702e+14 226376.6
## 4        - LARCENY.PICK.POCKET  0   0.1250      9068 5.621702e+14 226376.6
## 5    - VANDALISM.MOTOR.VEHICLE  0   0.3125      9068 5.621702e+14 226376.6
## 6              - LOST.PROPERTY  0   0.0000      9068 5.621702e+14 226376.6
## 7                       - drug  0   0.0625      9068 5.621702e+14 226376.6
## 8      - Number_of_Crimes_2014  0   0.5000      9068 5.621702e+14 226376.6
## 9 - community_facilities_count  0   0.4375      9068 5.621702e+14 226376.6
plot(mylogit)

#
#
#
#
#

#predict house price
devset$predicted_close_price2<-predict(mylogit, devset)
## Warning in predict.lm(mylogit, devset): prediction from a rank-deficient
## fit may be misleading
# how far off are the predicted prices
devset$difference2 <- devset$predicted_close_price2 - devset$Close.Price
devset$percent_error2 <- abs(devset$difference2/devset$Close.Price)

#Model 3
mylogit <- lm(Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + BURG.FORCE.RES.NIGHT, data = trainset
)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + 
##     BURG.FORCE.RES.NIGHT
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
##                        Df  Sum of Sq        RSS    AIC
## <none>                               5.6217e+14 226377
## - Date.Quarter          1 2.4396e+11 5.6241e+14 226379
## - Baths.All             1 1.5544e+13 5.7771e+14 226623
## - Bedrooms              1 3.3119e+13 5.9529e+14 226896
## - Total.Square.Footage  1 5.1436e+13 6.1361e+14 227172
## - Zip.Code             35 3.7703e+14 9.3920e+14 230981
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + 
##     LARCENY.PICK.POCKET + Zip.Code + AGG.ASSLT.FIREARM.CITIZEN + 
##     BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
## 
##                           Step Df Deviance Resid. Df   Resid. Dev      AIC
## 1                                               9068 5.621702e+14 226376.6
## 2       - BURG.FORCE.RES.NIGHT  0   0.0000      9068 5.621702e+14 226376.6
## 3  - AGG.ASSLT.FIREARM.CITIZEN  0   0.0000      9068 5.621702e+14 226376.6
## 4        - LARCENY.PICK.POCKET  0   0.1875      9068 5.621702e+14 226376.6
## 5    - VANDALISM.MOTOR.VEHICLE  0   0.0625      9068 5.621702e+14 226376.6
## 6              - LOST.PROPERTY  0   0.0625      9068 5.621702e+14 226376.6
## 7                       - drug  0   0.4375      9068 5.621702e+14 226376.6
## 8 - community_facilities_count  0   0.4375      9068 5.621702e+14 226376.6
plot(mylogit)

#
#
#
#
#
#predict house price
devset$predicted_close_price3<-predict(mylogit, devset)
## Warning in predict.lm(mylogit, devset): prediction from a rank-deficient
## fit may be misleading
# how far off are the predicted prices
devset$difference3 <- devset$predicted_close_price3 - devset$Close.Price
devset$percent_error3 <- abs(devset$difference3/devset$Close.Price)

#Model 4
mylogit <- lm(Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT, data = trainset)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + Bedrooms + Total.Square.Footage + 
##     Baths.All + Date.Quarter + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
##                        Df  Sum of Sq        RSS    AIC
## <none>                               5.6217e+14 226377
## - Date.Quarter          1 2.4396e+11 5.6241e+14 226379
## - Baths.All             1 1.5544e+13 5.7771e+14 226623
## - Bedrooms              1 3.3119e+13 5.9529e+14 226896
## - Total.Square.Footage  1 5.1436e+13 6.1361e+14 227172
## - Zip.Code             35 3.7703e+14 9.3920e+14 230981
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
## 
##                             Step Df Deviance Resid. Df   Resid. Dev
## 1                                                 9068 5.621702e+14
## 2         - BURG.FORCE.RES.NIGHT  0   0.0000      9068 5.621702e+14
## 3  - DRIVING.UNDER.THE.INFLUENCE  0   0.0000      9068 5.621702e+14
## 4              - POL.INFORMATION  0   0.0000      9068 5.621702e+14
## 5  - ASSAULT...BATTERY...CITIZEN  0   0.0000      9068 5.621702e+14
## 6          - LARCENY.PICK.POCKET  0   1.1875      9068 5.621702e+14
## 7      - VANDALISM.MOTOR.VEHICLE  0  18.0000      9068 5.621702e+14
## 8                - LOST.PROPERTY  0  13.1875      9068 5.621702e+14
## 9                         - drug  0   4.8125      9068 5.621702e+14
## 10  - community_facilities_count  0   0.9375      9068 5.621702e+14
## 11              - mean_sales_num  0   0.3750      9068 5.621702e+14
## 12            - median_sales_num  0   0.5000      9068 5.621702e+14
##         AIC
## 1  226376.6
## 2  226376.6
## 3  226376.6
## 4  226376.6
## 5  226376.6
## 6  226376.6
## 7  226376.6
## 8  226376.6
## 9  226376.6
## 10 226376.6
## 11 226376.6
## 12 226376.6
plot(mylogit)

#
#
#
#
#
#predict house price
devset$predicted_close_price4<-predict(mylogit, devset)
## Warning in predict.lm(mylogit, devset): prediction from a rank-deficient
## fit may be misleading
# how far off are the predicted prices
devset$difference4 <- devset$predicted_close_price4 - devset$Close.Price
devset$percent_error4 <- abs(devset$difference4/devset$Close.Price)

#Model 5
mylogit <- lm(Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT, data = trainset)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ median_sales_num + Bedrooms + Total.Square.Footage + 
##     Baths.All + Date.Quarter + Zip.Code
## 
## 
## Step:  AIC=226376.6
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
##                        Df  Sum of Sq        RSS    AIC
## <none>                               5.6217e+14 226377
## - Date.Quarter          1 2.4396e+11 5.6241e+14 226379
## - Baths.All             1 1.5544e+13 5.7771e+14 226623
## - Bedrooms              1 3.3119e+13 5.9529e+14 226896
## - Total.Square.Footage  1 5.1436e+13 6.1361e+14 227172
## - Zip.Code             35 3.7703e+14 9.3920e+14 230981
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + LOST.PROPERTY + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + 
##     Zip.Code + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + 
##     DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + 
##     Zip.Code
## 
## 
##                             Step Df Deviance Resid. Df   Resid. Dev
## 1                                                 9068 5.621702e+14
## 2         - BURG.FORCE.RES.NIGHT  0   0.0000      9068 5.621702e+14
## 3  - DRIVING.UNDER.THE.INFLUENCE  0   0.0000      9068 5.621702e+14
## 4              - POL.INFORMATION  0   0.0000      9068 5.621702e+14
## 5  - ASSAULT...BATTERY...CITIZEN  0   0.0000      9068 5.621702e+14
## 6          - LARCENY.PICK.POCKET  0   1.1875      9068 5.621702e+14
## 7      - VANDALISM.MOTOR.VEHICLE  0  18.0000      9068 5.621702e+14
## 8                - LOST.PROPERTY  0  13.1875      9068 5.621702e+14
## 9                         - drug  0   4.8125      9068 5.621702e+14
## 10  - community_facilities_count  0   0.9375      9068 5.621702e+14
## 11              - mean_sales_num  0   0.3750      9068 5.621702e+14
## 12            - median_sales_num  0   0.5000      9068 5.621702e+14
##         AIC
## 1  226376.6
## 2  226376.6
## 3  226376.6
## 4  226376.6
## 5  226376.6
## 6  226376.6
## 7  226376.6
## 8  226376.6
## 9  226376.6
## 10 226376.6
## 11 226376.6
## 12 226376.6
plot(mylogit)

#
#
#
#
#
#predict housing price
devset$predicted_close_price5<-predict(mylogit, devset)
## Warning in predict.lm(mylogit, devset): prediction from a rank-deficient
## fit may be misleading
# how far off are the predicted prices
devset$difference5 <- devset$predicted_close_price5 - devset$Close.Price
devset$percent_error5 <- abs(devset$difference5/devset$Close.Price)

#Model 9 - Final Linear Model
#linear model with 3 factors and 4 identified in PCA, AIC 227765.1 
#This comes here because some columns were removed for PCA, below.

names(trainset)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "IRS_Estimated_Population_2014"       
## [29] "Total_Number_of_Sales_State_Planning"
## [30] "ML."                                 
## [31] "City"                                
## [32] "List.Price"                          
## [33] "Original.List.Price"                 
## [34] "Close.Price"                         
## [35] "Legal.Subdivision"                   
## [36] "Status"                              
## [37] "Date.Quarter"                        
## [38] "Close.Date"                          
## [39] "DOMM"                                
## [40] "DOMP"                                
## [41] "Baths.All"                           
## [42] "Bedrooms"                            
## [43] "Total.Square.Footage"                
## [44] "Type.y"                              
## [45] "median_sales_num"                    
## [46] "mean_sales_num"
mylogit <- lm(
  as.formula(paste(colnames(trainset)[34], "~",
                   paste(colnames(trainset)[c(2, 17, 4, 3, 41:43)], collapse = "+"),
                   sep = ""
  )),
  data=trainset
)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=229396.2
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + Baths.All + 
##     Bedrooms + Total.Square.Footage
## 
##                             Df  Sum of Sq        RSS    AIC
## <none>                                    7.8868e+14 229396
## - BURG.FORCE.RES.NIGHT       1 1.1865e+12 7.8987e+14 229408
## - LITTERING.TRASH.DUMPING    1 3.2671e+12 7.9195e+14 229432
## - ROB.FIREARM...STREET       1 1.0681e+13 7.9936e+14 229517
## - Baths.All                  1 2.2729e+13 8.1141e+14 229653
## - Bedrooms                   1 4.0714e+13 8.2939e+14 229853
## - Total.Square.Footage       1 5.3457e+13 8.4214e+14 229991
## - AGG.ASSLT.FIREARM.CITIZEN  1 1.1452e+14 9.0319e+14 230629
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + Baths.All + 
##     Bedrooms + Total.Square.Footage
## 
## Final Model:
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + Baths.All + 
##     Bedrooms + Total.Square.Footage
## 
## 
##   Step Df Deviance Resid. Df   Resid. Dev      AIC
## 1                       9100 7.886793e+14 229396.2
plot(mylogit)

#
#
#
#
#
#predict housing price
devset$predicted_close_price9<-predict(mylogit, devset)

# how far off are the predicted prices
devset$difference9 <- devset$predicted_close_price9 - devset$Close.Price
devset$percent_error9 <- abs(devset$difference9/devset$Close.Price)

#linear model 6
mylogit <- lm(Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + drug + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT, data = trainset)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=226508.6
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
##                               Df  Sum of Sq        RSS    AIC
## - community_facilities_count   1 2.3350e+10 5.7354e+14 226507
## - POL.INFORMATION              1 4.6757e+10 5.7356e+14 226507
## - median_sales_num             1 1.2125e+11 5.7364e+14 226509
## - BURG.FORCE.RES.NIGHT         1 1.2138e+11 5.7364e+14 226509
## <none>                                      5.7352e+14 226509
## - drug                         1 1.4201e+11 5.7366e+14 226509
## - Date.Quarter                 1 2.9637e+11 5.7381e+14 226511
## - ASSAULT...BATTERY...CITIZEN  1 3.6121e+11 5.7388e+14 226512
## - VANDALISM.MOTOR.VEHICLE      1 9.9670e+11 5.7451e+14 226522
## - DRIVING.UNDER.THE.INFLUENCE  1 1.3390e+12 5.7486e+14 226528
## - LARCENY.PICK.POCKET          1 3.8860e+12 5.7740e+14 226568
## - mean_sales_num               1 7.6426e+12 5.8116e+14 226627
## - Baths.All                    1 1.5000e+13 5.8852e+14 226742
## - Bedrooms                     1 3.5553e+13 6.0907e+14 227054
## - Total.Square.Footage         1 4.8804e+13 6.2232e+14 227250
## 
## Step:  AIC=226507
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + drug + 
##     VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
##                               Df  Sum of Sq        RSS    AIC
## - POL.INFORMATION              1 4.5254e+10 5.7359e+14 226506
## - BURG.FORCE.RES.NIGHT         1 1.0051e+11 5.7364e+14 226507
## <none>                                      5.7354e+14 226507
## - drug                         1 1.3477e+11 5.7368e+14 226507
## - median_sales_num             1 1.4371e+11 5.7369e+14 226507
## + community_facilities_count   1 2.3350e+10 5.7352e+14 226509
## - Date.Quarter                 1 2.9714e+11 5.7384e+14 226510
## - ASSAULT...BATTERY...CITIZEN  1 3.3803e+11 5.7388e+14 226510
## - VANDALISM.MOTOR.VEHICLE      1 9.7958e+11 5.7452e+14 226521
## - DRIVING.UNDER.THE.INFLUENCE  1 1.3613e+12 5.7490e+14 226527
## - LARCENY.PICK.POCKET          1 3.9003e+12 5.7744e+14 226567
## - mean_sales_num               1 8.1235e+12 5.8166e+14 226633
## - Baths.All                    1 1.4977e+13 5.8852e+14 226740
## - Bedrooms                     1 3.5645e+13 6.0919e+14 227054
## - Total.Square.Footage         1 4.8884e+13 6.2243e+14 227250
## 
## Step:  AIC=226505.7
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + drug + 
##     VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
##                               Df  Sum of Sq        RSS    AIC
## - BURG.FORCE.RES.NIGHT         1 8.7713e+10 5.7367e+14 226505
## <none>                                      5.7359e+14 226506
## + POL.INFORMATION              1 4.5254e+10 5.7354e+14 226507
## + community_facilities_count   1 2.1847e+10 5.7356e+14 226507
## - drug                         1 2.7132e+11 5.7386e+14 226508
## - Date.Quarter                 1 2.9795e+11 5.7388e+14 226508
## - ASSAULT...BATTERY...CITIZEN  1 3.1027e+11 5.7390e+14 226509
## - median_sales_num             1 3.3385e+11 5.7392e+14 226509
## - VANDALISM.MOTOR.VEHICLE      1 9.8713e+11 5.7457e+14 226519
## - DRIVING.UNDER.THE.INFLUENCE  1 1.3716e+12 5.7496e+14 226525
## - LARCENY.PICK.POCKET          1 3.8629e+12 5.7745e+14 226565
## - mean_sales_num               1 1.1254e+13 5.8484e+14 226681
## - Baths.All                    1 1.4935e+13 5.8852e+14 226738
## - Bedrooms                     1 3.5943e+13 6.0953e+14 227057
## - Total.Square.Footage         1 4.8889e+13 6.2248e+14 227249
## 
## Step:  AIC=226505.1
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + drug + 
##     VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     DRIVING.UNDER.THE.INFLUENCE
## 
##                               Df  Sum of Sq        RSS    AIC
## <none>                                      5.7367e+14 226505
## + BURG.FORCE.RES.NIGHT         1 8.7713e+10 5.7359e+14 226506
## + POL.INFORMATION              1 3.2460e+10 5.7364e+14 226507
## - ASSAULT...BATTERY...CITIZEN  1 2.4462e+11 5.7392e+14 226507
## + community_facilities_count   1 2.6581e+09 5.7367e+14 226507
## - median_sales_num             1 2.6993e+11 5.7394e+14 226507
## - Date.Quarter                 1 2.9864e+11 5.7397e+14 226508
## - drug                         1 3.5537e+11 5.7403e+14 226509
## - VANDALISM.MOTOR.VEHICLE      1 1.2795e+12 5.7495e+14 226523
## - DRIVING.UNDER.THE.INFLUENCE  1 1.3321e+12 5.7501e+14 226524
## - LARCENY.PICK.POCKET          1 4.2399e+12 5.7791e+14 226570
## - mean_sales_num               1 1.1313e+13 5.8499e+14 226681
## - Baths.All                    1 1.4858e+13 5.8853e+14 226736
## - Bedrooms                     1 3.6707e+13 6.1038e+14 227068
## - Total.Square.Footage         1 4.8813e+13 6.2249e+14 227247
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + community_facilities_count + 
##     drug + VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     POL.INFORMATION + DRIVING.UNDER.THE.INFLUENCE + BURG.FORCE.RES.NIGHT
## 
## Final Model:
## Close.Price ~ median_sales_num + mean_sales_num + Bedrooms + 
##     Total.Square.Footage + Baths.All + Date.Quarter + drug + 
##     VANDALISM.MOTOR.VEHICLE + LARCENY.PICK.POCKET + ASSAULT...BATTERY...CITIZEN + 
##     DRIVING.UNDER.THE.INFLUENCE
## 
## 
##                           Step Df    Deviance Resid. Df   Resid. Dev
## 1                                                  9093 5.735180e+14
## 2 - community_facilities_count  1 23350406474      9094 5.735414e+14
## 3            - POL.INFORMATION  1 45254318174      9095 5.735866e+14
## 4       - BURG.FORCE.RES.NIGHT  1 87712518684      9096 5.736744e+14
##        AIC
## 1 226508.6
## 2 226507.0
## 3 226505.7
## 4 226505.1
plot(mylogit)

#
#
#
#
#
#predict house price
devset$predicted_close_price6<-predict(mylogit, devset)

# how far off are the predicted prices
devset$difference6 <- devset$predicted_close_price6 - devset$Close.Price
devset$percent_error6 <- abs(devset$difference6/devset$Close.Price)

#linear model with 3 factors

#PCA
#In order to do PCA, factor columns need to be removed, but they are needed for the lm, above
#rattle was used here to identify the factor columns, quickly
#it is commented out for the RMD
names(full)
##  [1] "Zip.Code"                            
##  [2] "ROB.FIREARM...STREET"                
##  [3] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [4] "BURG.FORCE.RES.NIGHT"                
##  [5] "LARCENY.PICK.POCKET"                 
##  [6] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [7] "ASSAULT...BATTERY...CITIZEN"         
##  [8] "VANDALISM.MOTOR.VEHICLE"             
##  [9] "WEAPON.POSSESSION.HANDGUN"           
## [10] "SEX.OFFENSE...SEX..ASSAULT"          
## [11] "drug"                                
## [12] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [13] "JUVENILE.RUNAWAY"                    
## [14] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [15] "DISORDERLY.CONDUCT"                  
## [16] "SUICIDE...POISON.OVERDOSE"           
## [17] "LITTERING.TRASH.DUMPING"             
## [18] "TRESPASSING"                         
## [19] "HARASSMENT.STALKING"                 
## [20] "DRIVING.UNDER.THE.INFLUENCE"         
## [21] "FIRE.OTHER"                          
## [22] "POL.INFORMATION"                     
## [23] "LOST.PROPERTY"                       
## [24] "RECOVERED.PROPERTY.MONT..CO."        
## [25] "community_facilities_count"          
## [26] "Number_of_Sales_2014"                
## [27] "Number_of_Crimes_2014"               
## [28] "IRS_Estimated_Population_2014"       
## [29] "Total_Number_of_Sales_State_Planning"
## [30] "ML."                                 
## [31] "City"                                
## [32] "List.Price"                          
## [33] "Original.List.Price"                 
## [34] "Close.Price"                         
## [35] "Legal.Subdivision"                   
## [36] "Status"                              
## [37] "Date.Quarter"                        
## [38] "Close.Date"                          
## [39] "DOMM"                                
## [40] "DOMP"                                
## [41] "Baths.All"                           
## [42] "Bedrooms"                            
## [43] "Total.Square.Footage"                
## [44] "Type.y"                              
## [45] "median_sales_num"                    
## [46] "mean_sales_num"
#remove columns that are factors (because something isn't working in PCA)
full1 <- full [, -c(1, 30, 31,35,36,38,44)]

#split dataset to test, train
index <- 1:nrow(full1)
testindex <- sample(index, trunc(length(index)/5))
testset <- full1[testindex,]
trainset <- full1[-testindex,]

#split for development set
index <- 1:nrow(trainset)
devsetindex <- sample(index, trunc(length(index)/5))
devset2 <- full1[devsetindex,]
trainset <- full1[-devsetindex,]


#PCA
#http://www.statmethods.net/advstats/factor.html
#run the Principle Component Analysis
fit <- princomp(trainset, cor=TRUE)
summary(fit) # print variance accounted for 
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4
## Standard deviation     3.6347734 2.2972031 1.92555828 1.51413278
## Proportion of Variance 0.3387584 0.1353113 0.09507115 0.05878457
## Cumulative Proportion  0.3387584 0.4740697 0.56914089 0.62792545
##                            Comp.5     Comp.6     Comp.7     Comp.8
## Standard deviation     1.40646368 1.29780863 1.10056719 1.04303378
## Proportion of Variance 0.05072154 0.04318737 0.03105764 0.02789537
## Cumulative Proportion  0.67864699 0.72183436 0.75289200 0.78078737
##                            Comp.9    Comp.10    Comp.11    Comp.12
## Standard deviation     1.00355642 0.98796539 0.94429259 0.91201272
## Proportion of Variance 0.02582373 0.02502758 0.02286381 0.02132736
## Cumulative Proportion  0.80661110 0.83163868 0.85450249 0.87582986
##                           Comp.13    Comp.14    Comp.15    Comp.16
## Standard deviation     0.87484682 0.80456184 0.79970691 0.66871176
## Proportion of Variance 0.01962454 0.01659794 0.01639823 0.01146604
## Cumulative Proportion  0.89545439 0.91205234 0.92845057 0.93991661
##                            Comp.17     Comp.18     Comp.19     Comp.20
## Standard deviation     0.591124046 0.545543644 0.518745719 0.468109715
## Proportion of Variance 0.008959683 0.007631227 0.006899926 0.005618633
## Cumulative Proportion  0.948876290 0.956507517 0.963407443 0.969026077
##                            Comp.21     Comp.22     Comp.23     Comp.24
## Standard deviation     0.444012954 0.436325690 0.421277196 0.376892053
## Proportion of Variance 0.005055064 0.004881541 0.004550628 0.003642247
## Cumulative Proportion  0.974081141 0.978962682 0.983513310 0.987155556
##                            Comp.25    Comp.26     Comp.27     Comp.28
## Standard deviation     0.336366492 0.31558132 0.289889123 0.248268529
## Proportion of Variance 0.002901088 0.00255363 0.002154762 0.001580443
## Cumulative Proportion  0.990056644 0.99261027 0.994765036 0.996345478
##                             Comp.29      Comp.30      Comp.31      Comp.32
## Standard deviation     0.1972720300 0.1849695961 0.1462771888 0.1145508830
## Proportion of Variance 0.0009978527 0.0008772757 0.0005486414 0.0003364591
## Cumulative Proportion  0.9973433310 0.9982206066 0.9987692481 0.9991057072
##                             Comp.33      Comp.34      Comp.35      Comp.36
## Standard deviation     0.1021310436 0.0907729681 0.0818125483 0.0652921575
## Proportion of Variance 0.0002674551 0.0002112752 0.0001716229 0.0001093094
## Cumulative Proportion  0.9993731623 0.9995844375 0.9997560604 0.9998653698
##                             Comp.37      Comp.38      Comp.39
## Standard deviation     0.0552646682 4.248429e-02 1.978587e-02
## Proportion of Variance 0.0000783124 4.627988e-05 1.003797e-05
## Cumulative Proportion  0.9999436822 9.999900e-01 1.000000e+00
loadings(fit) # pc loadings 
## 
## Loadings:
##                                      Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## ROB.FIREARM...STREET                 -0.234                0.107  0.150
## AGG.ASSLT.FIREARM.CITIZEN            -0.242                0.158  0.157
## BURG.FORCE.RES.NIGHT                 -0.181 -0.207 -0.134              
## LARCENY.PICK.POCKET                  -0.187 -0.237        -0.127       
## AUTO.THEFT...PASSENGER.VEHICLE       -0.241                            
## ASSAULT...BATTERY...CITIZEN          -0.260                      -0.102
## VANDALISM.MOTOR.VEHICLE              -0.211                      -0.217
## WEAPON.POSSESSION.HANDGUN            -0.200        -0.130  0.140       
## SEX.OFFENSE...SEX..ASSAULT           -0.157                      -0.355
## drug                                 -0.238                            
## FAMILY.OFFENSE...ABUSE.CHILD         -0.176  0.135 -0.118  0.233  0.167
## JUVENILE.RUNAWAY                     -0.171  0.181         0.143 -0.255
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.215 -0.103                0.265
## DISORDERLY.CONDUCT                   -0.208 -0.183                0.169
## SUICIDE...POISON.OVERDOSE                          -0.131        -0.344
## LITTERING.TRASH.DUMPING              -0.188 -0.131                0.234
## TRESPASSING                          -0.213 -0.144                0.234
## HARASSMENT.STALKING                         -0.271  0.117 -0.111  0.208
## DRIVING.UNDER.THE.INFLUENCE          -0.153               -0.149 -0.126
## FIRE.OTHER                           -0.215                            
## POL.INFORMATION                      -0.144 -0.121  0.120 -0.118       
## LOST.PROPERTY                        -0.113 -0.172  0.139 -0.240 -0.327
## RECOVERED.PROPERTY.MONT..CO.         -0.159 -0.178        -0.138 -0.305
## community_facilities_count                   0.168  0.333              
## Number_of_Sales_2014                         0.193  0.436              
## Number_of_Crimes_2014                -0.216         0.287              
## IRS_Estimated_Population_2014                0.219  0.405              
## Total_Number_of_Sales_State_Planning         0.258  0.393              
## List.Price                            0.118 -0.270  0.188  0.251       
## Original.List.Price                   0.118 -0.270  0.188  0.261       
## Close.Price                           0.118 -0.272  0.185  0.243       
## Date.Quarter                                                           
## DOMM                                                       0.290 -0.131
## DOMP                                                       0.309 -0.131
## Baths.All                                                  0.336       
## Bedrooms                                    -0.112         0.359       
## Total.Square.Footage                                                   
## median_sales_num                      0.162 -0.271  0.140 -0.187       
## mean_sales_num                        0.171 -0.258  0.140 -0.178       
##                                      Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## ROB.FIREARM...STREET                        -0.257 -0.143               
## AGG.ASSLT.FIREARM.CITIZEN                                               
## BURG.FORCE.RES.NIGHT                        -0.128 -0.103               
## LARCENY.PICK.POCKET                                                     
## AUTO.THEFT...PASSENGER.VEHICLE              -0.236 -0.110               
## ASSAULT...BATTERY...CITIZEN                                             
## VANDALISM.MOTOR.VEHICLE                     -0.260                      
## WEAPON.POSSESSION.HANDGUN                                               
## SEX.OFFENSE...SEX..ASSAULT                   0.202                      
## drug                                         0.151                      
## FAMILY.OFFENSE...ABUSE.CHILD                 0.135  0.203               
## JUVENILE.RUNAWAY                     -0.121 -0.294 -0.135               
## LIQUOR...UNLAWFUL.POSS.UNDER.21              0.161  0.161               
## DISORDERLY.CONDUCT                                  0.158               
## SUICIDE...POISON.OVERDOSE                           0.472  0.207        
## LITTERING.TRASH.DUMPING                                                 
## TRESPASSING                                  0.112                      
## HARASSMENT.STALKING                                                     
## DRIVING.UNDER.THE.INFLUENCE                  0.216  0.264               
## FIRE.OTHER                                  -0.211 -0.194               
## POL.INFORMATION                              0.392 -0.239 -0.100 -0.119 
## LOST.PROPERTY                                0.224 -0.242               
## RECOVERED.PROPERTY.MONT..CO.                       -0.140               
## community_facilities_count                          0.285  0.138  0.105 
## Number_of_Sales_2014                                                    
## Number_of_Crimes_2014                                                   
## IRS_Estimated_Population_2014                                           
## Total_Number_of_Sales_State_Planning                                    
## List.Price                           -0.124 -0.121  0.138               
## Original.List.Price                         -0.121  0.134               
## Close.Price                          -0.138 -0.120  0.137               
## Date.Quarter                                       -0.353  0.469  0.797 
## DOMM                                  0.616                             
## DOMP                                  0.600                             
## Baths.All                            -0.245  0.326 -0.190               
## Bedrooms                             -0.273  0.318 -0.162               
## Total.Square.Footage                                0.124 -0.815  0.537 
## median_sales_num                                                        
## mean_sales_num                              -0.102                      
##                                      Comp.11 Comp.12 Comp.13 Comp.14
## ROB.FIREARM...STREET                                          0.165 
## AGG.ASSLT.FIREARM.CITIZEN             0.105          -0.107         
## BURG.FORCE.RES.NIGHT                         -0.144   0.209  -0.147 
## LARCENY.PICK.POCKET                  -0.149           0.129         
## AUTO.THEFT...PASSENGER.VEHICLE                                0.250 
## ASSAULT...BATTERY...CITIZEN                                         
## VANDALISM.MOTOR.VEHICLE              -0.165                   0.320 
## WEAPON.POSSESSION.HANDGUN             0.385          -0.179  -0.145 
## SEX.OFFENSE...SEX..ASSAULT                    0.126   0.412   0.284 
## drug                                          0.145   0.105   0.179 
## FAMILY.OFFENSE...ABUSE.CHILD          0.225                         
## JUVENILE.RUNAWAY                                             -0.115 
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.111                  -0.122 
## DISORDERLY.CONDUCT                   -0.199          -0.181         
## SUICIDE...POISON.OVERDOSE             0.209  -0.215  -0.418   0.113 
## LITTERING.TRASH.DUMPING               0.133  -0.173   0.279  -0.385 
## TRESPASSING                                          -0.105         
## HARASSMENT.STALKING                   0.300  -0.202           0.441 
## DRIVING.UNDER.THE.INFLUENCE          -0.558                  -0.140 
## FIRE.OTHER                                           -0.261         
## POL.INFORMATION                       0.209   0.301           0.146 
## LOST.PROPERTY                                 0.120          -0.175 
## RECOVERED.PROPERTY.MONT..CO.          0.215  -0.125  -0.218  -0.320 
## community_facilities_count            0.156  -0.296   0.391  -0.144 
## Number_of_Sales_2014                                 -0.211         
## Number_of_Crimes_2014                                               
## IRS_Estimated_Population_2014                                       
## Total_Number_of_Sales_State_Planning                 -0.211         
## List.Price                                    0.298                 
## Original.List.Price                           0.285                 
## Close.Price                                   0.298                 
## Date.Quarter                                                        
## DOMM                                         -0.117                 
## DOMP                                                                
## Baths.All                            -0.159  -0.325  -0.139   0.127 
## Bedrooms                             -0.125  -0.344                 
## Total.Square.Footage                  0.137                         
## median_sales_num                             -0.148                 
## mean_sales_num                               -0.178                 
##                                      Comp.15 Comp.16 Comp.17 Comp.18
## ROB.FIREARM...STREET                 -0.102                         
## AGG.ASSLT.FIREARM.CITIZEN                                           
## BURG.FORCE.RES.NIGHT                          0.487                 
## LARCENY.PICK.POCKET                   0.121  -0.187          -0.335 
## AUTO.THEFT...PASSENGER.VEHICLE                0.187          -0.209 
## ASSAULT...BATTERY...CITIZEN          -0.127                   0.121 
## VANDALISM.MOTOR.VEHICLE              -0.187                   0.189 
## WEAPON.POSSESSION.HANDGUN             0.364                  -0.208 
## SEX.OFFENSE...SEX..ASSAULT                   -0.286   0.176  -0.289 
## drug                                                 -0.195   0.255 
## FAMILY.OFFENSE...ABUSE.CHILD                  0.230          -0.145 
## JUVENILE.RUNAWAY                             -0.183                 
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.250  -0.169                 
## DISORDERLY.CONDUCT                                                  
## SUICIDE...POISON.OVERDOSE            -0.230   0.105          -0.138 
## LITTERING.TRASH.DUMPING              -0.134          -0.101         
## TRESPASSING                          -0.284  -0.350           0.124 
## HARASSMENT.STALKING                   0.408                   0.424 
## DRIVING.UNDER.THE.INFLUENCE           0.311   0.247           0.131 
## FIRE.OTHER                            0.441                  -0.275 
## POL.INFORMATION                      -0.200   0.427                 
## LOST.PROPERTY                         0.119  -0.213                 
## RECOVERED.PROPERTY.MONT..CO.                                  0.308 
## community_facilities_count            0.103                         
## Number_of_Sales_2014                                                
## Number_of_Crimes_2014                                        -0.155 
## IRS_Estimated_Population_2014                 0.152                 
## Total_Number_of_Sales_State_Planning                                
## List.Price                                                          
## Original.List.Price                                                 
## Close.Price                                                         
## Date.Quarter                                                        
## DOMM                                                                
## DOMP                                                                
## Baths.All                                            -0.642         
## Bedrooms                                              0.670         
## Total.Square.Footage                                                
## median_sales_num                     -0.117                  -0.241 
## mean_sales_num                                               -0.221 
##                                      Comp.19 Comp.20 Comp.21 Comp.22
## ROB.FIREARM...STREET                 -0.107                  -0.111 
## AGG.ASSLT.FIREARM.CITIZEN                                    -0.158 
## BURG.FORCE.RES.NIGHT                  0.276  -0.192   0.115         
## LARCENY.PICK.POCKET                  -0.227   0.199           0.103 
## AUTO.THEFT...PASSENGER.VEHICLE                                0.141 
## ASSAULT...BATTERY...CITIZEN                                  -0.205 
## VANDALISM.MOTOR.VEHICLE              -0.103  -0.125                 
## WEAPON.POSSESSION.HANDGUN            -0.106  -0.253                 
## SEX.OFFENSE...SEX..ASSAULT            0.368  -0.203   0.104         
## drug                                 -0.537  -0.128          -0.214 
## FAMILY.OFFENSE...ABUSE.CHILD                  0.266   0.153  -0.395 
## JUVENILE.RUNAWAY                              0.326                 
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.217                         
## DISORDERLY.CONDUCT                    0.213   0.201  -0.119         
## SUICIDE...POISON.OVERDOSE            -0.187                   0.188 
## LITTERING.TRASH.DUMPING              -0.150  -0.405           0.390 
## TRESPASSING                                                         
## HARASSMENT.STALKING                   0.165                         
## DRIVING.UNDER.THE.INFLUENCE                  -0.136                 
## FIRE.OTHER                                                          
## POL.INFORMATION                               0.252  -0.149   0.265 
## LOST.PROPERTY                                                       
## RECOVERED.PROPERTY.MONT..CO.          0.193   0.192          -0.124 
## community_facilities_count                    0.298  -0.147   0.112 
## Number_of_Sales_2014                         -0.230                 
## Number_of_Crimes_2014                         0.142                 
## IRS_Estimated_Population_2014                -0.119          -0.158 
## Total_Number_of_Sales_State_Planning         -0.143           0.131 
## List.Price                                                          
## Original.List.Price                                                 
## Close.Price                                                         
## Date.Quarter                                                        
## DOMM                                          0.153   0.638   0.190 
## DOMP                                         -0.153  -0.651  -0.201 
## Baths.All                             0.235                         
## Bedrooms                             -0.232                         
## Total.Square.Footage                                                
## median_sales_num                     -0.113                  -0.324 
## mean_sales_num                       -0.146                  -0.357 
##                                      Comp.23 Comp.24 Comp.25 Comp.26
## ROB.FIREARM...STREET                  0.131  -0.194   0.141  -0.183 
## AGG.ASSLT.FIREARM.CITIZEN            -0.165   0.132   0.298   0.166 
## BURG.FORCE.RES.NIGHT                                  0.177   0.193 
## LARCENY.PICK.POCKET                   0.227           0.409   0.380 
## AUTO.THEFT...PASSENGER.VEHICLE               -0.174          -0.268 
## ASSAULT...BATTERY...CITIZEN                   0.143  -0.129   0.394 
## VANDALISM.MOTOR.VEHICLE                       0.545          -0.156 
## WEAPON.POSSESSION.HANDGUN            -0.356           0.227         
## SEX.OFFENSE...SEX..ASSAULT                   -0.162  -0.147  -0.107 
## drug                                 -0.192  -0.131                 
## FAMILY.OFFENSE...ABUSE.CHILD          0.478          -0.196         
## JUVENILE.RUNAWAY                     -0.103  -0.362  -0.249   0.198 
## LIQUOR...UNLAWFUL.POSS.UNDER.21                       0.257         
## DISORDERLY.CONDUCT                   -0.297   0.221  -0.357         
## SUICIDE...POISON.OVERDOSE                            -0.125         
## LITTERING.TRASH.DUMPING               0.174  -0.104  -0.301         
## TRESPASSING                           0.134                  -0.227 
## HARASSMENT.STALKING                          -0.160  -0.141   0.215 
## DRIVING.UNDER.THE.INFLUENCE                  -0.271          -0.225 
## FIRE.OTHER                            0.105   0.154  -0.177  -0.182 
## POL.INFORMATION                      -0.106                         
## LOST.PROPERTY                         0.353   0.296                 
## RECOVERED.PROPERTY.MONT..CO.                 -0.179   0.157  -0.249 
## community_facilities_count                    0.194   0.187  -0.321 
## Number_of_Sales_2014                  0.150                         
## Number_of_Crimes_2014                -0.189          -0.137   0.237 
## IRS_Estimated_Population_2014        -0.102          -0.143         
## Total_Number_of_Sales_State_Planning  0.148  -0.131                 
## List.Price                                                          
## Original.List.Price                                                 
## Close.Price                                                         
## Date.Quarter                                                        
## DOMM                                 -0.122                         
## DOMP                                  0.102                         
## Baths.All                                                           
## Bedrooms                                                            
## Total.Square.Footage                                                
## median_sales_num                     -0.221                         
## mean_sales_num                       -0.108                         
##                                      Comp.27 Comp.28 Comp.29 Comp.30
## ROB.FIREARM...STREET                  0.198   0.212   0.256  -0.495 
## AGG.ASSLT.FIREARM.CITIZEN             0.113  -0.593  -0.190   0.174 
## BURG.FORCE.RES.NIGHT                          0.148   0.250   0.347 
## LARCENY.PICK.POCKET                           0.129  -0.275         
## AUTO.THEFT...PASSENGER.VEHICLE       -0.343           0.213   0.154 
## ASSAULT...BATTERY...CITIZEN                           0.247  -0.250 
## VANDALISM.MOTOR.VEHICLE               0.210          -0.331         
## WEAPON.POSSESSION.HANDGUN             0.217   0.272                 
## SEX.OFFENSE...SEX..ASSAULT                           -0.100         
## drug                                 -0.200   0.115           0.230 
## FAMILY.OFFENSE...ABUSE.CHILD                  0.142  -0.227         
## JUVENILE.RUNAWAY                      0.403  -0.200           0.126 
## LIQUOR...UNLAWFUL.POSS.UNDER.21              -0.115          -0.256 
## DISORDERLY.CONDUCT                    0.216   0.302           0.187 
## SUICIDE...POISON.OVERDOSE                    -0.163   0.141         
## LITTERING.TRASH.DUMPING                              -0.222         
## TRESPASSING                                           0.166   0.407 
## HARASSMENT.STALKING                                          -0.104 
## DRIVING.UNDER.THE.INFLUENCE           0.138  -0.103          -0.102 
## FIRE.OTHER                           -0.143  -0.358                 
## POL.INFORMATION                       0.187                  -0.153 
## LOST.PROPERTY                                -0.117   0.332         
## RECOVERED.PROPERTY.MONT..CO.         -0.211   0.109  -0.411         
## community_facilities_count            0.105  -0.120   0.236         
## Number_of_Sales_2014                          0.119                 
## Number_of_Crimes_2014                -0.403   0.128                 
## IRS_Estimated_Population_2014        -0.157                  -0.209 
## Total_Number_of_Sales_State_Planning  0.255   0.177  -0.117   0.199 
## List.Price                                                          
## Original.List.Price                                                 
## Close.Price                                                         
## Date.Quarter                                                        
## DOMM                                                                
## DOMP                                                                
## Baths.All                                                           
## Bedrooms                                                            
## Total.Square.Footage                                                
## median_sales_num                                                    
## mean_sales_num                        0.230  -0.134           0.124 
##                                      Comp.31 Comp.32 Comp.33 Comp.34
## ROB.FIREARM...STREET                  0.412          -0.157         
## AGG.ASSLT.FIREARM.CITIZEN             0.360  -0.191                 
## BURG.FORCE.RES.NIGHT                          0.170  -0.126         
## LARCENY.PICK.POCKET                                                 
## AUTO.THEFT...PASSENGER.VEHICLE       -0.154  -0.408   0.251         
## ASSAULT...BATTERY...CITIZEN                   0.302   0.201         
## VANDALISM.MOTOR.VEHICLE              -0.258                         
## WEAPON.POSSESSION.HANDGUN            -0.219          -0.111         
## SEX.OFFENSE...SEX..ASSAULT            0.158   0.102                 
## drug                                                  0.236         
## FAMILY.OFFENSE...ABUSE.CHILD         -0.128                         
## JUVENILE.RUNAWAY                     -0.239                         
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.458           0.231         
## DISORDERLY.CONDUCT                    0.286  -0.228   0.121         
## SUICIDE...POISON.OVERDOSE                                           
## LITTERING.TRASH.DUMPING                      -0.111   0.103         
## TRESPASSING                                   0.260  -0.417         
## HARASSMENT.STALKING                                                 
## DRIVING.UNDER.THE.INFLUENCE                                         
## FIRE.OTHER                                    0.411                 
## POL.INFORMATION                               0.164                 
## LOST.PROPERTY                                -0.371                 
## RECOVERED.PROPERTY.MONT..CO.                                        
## community_facilities_count                    0.190                 
## Number_of_Sales_2014                 -0.224                         
## Number_of_Crimes_2014                                -0.185         
## IRS_Estimated_Population_2014                -0.266  -0.414         
## Total_Number_of_Sales_State_Planning  0.195           0.287         
## List.Price                                                   -0.115 
## Original.List.Price                                  -0.100   0.750 
## Close.Price                                                  -0.629 
## Date.Quarter                                                        
## DOMM                                                                
## DOMP                                                                
## Baths.All                                                           
## Bedrooms                                                            
## Total.Square.Footage                                                
## median_sales_num                              0.171   0.362         
## mean_sales_num                       -0.139  -0.106  -0.196         
##                                      Comp.35 Comp.36 Comp.37 Comp.38
## ROB.FIREARM...STREET                                                
## AGG.ASSLT.FIREARM.CITIZEN                            -0.180         
## BURG.FORCE.RES.NIGHT                 -0.182   0.113   0.119         
## LARCENY.PICK.POCKET                  -0.112                         
## AUTO.THEFT...PASSENGER.VEHICLE        0.270                         
## ASSAULT...BATTERY...CITIZEN           0.361  -0.314                 
## VANDALISM.MOTOR.VEHICLE                       0.118                 
## WEAPON.POSSESSION.HANDGUN             0.180          -0.115         
## SEX.OFFENSE...SEX..ASSAULT                                          
## drug                                 -0.275           0.258         
## FAMILY.OFFENSE...ABUSE.CHILD                                        
## JUVENILE.RUNAWAY                                                    
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.192   0.188   0.269         
## DISORDERLY.CONDUCT                   -0.179  -0.160                 
## SUICIDE...POISON.OVERDOSE            -0.127                         
## LITTERING.TRASH.DUMPING                                             
## TRESPASSING                           0.126   0.119  -0.200         
## HARASSMENT.STALKING                                                 
## DRIVING.UNDER.THE.INFLUENCE           0.194          -0.208         
## FIRE.OTHER                           -0.126           0.210         
## POL.INFORMATION                                                     
## LOST.PROPERTY                                                       
## RECOVERED.PROPERTY.MONT..CO.                                        
## community_facilities_count                                          
## Number_of_Sales_2014                 -0.359  -0.527  -0.309         
## Number_of_Crimes_2014                 0.214          -0.188         
## IRS_Estimated_Population_2014        -0.208   0.295   0.149         
## Total_Number_of_Sales_State_Planning  0.398   0.344   0.261         
## List.Price                                                   -0.802 
## Original.List.Price                                           0.299 
## Close.Price                                                   0.509 
## Date.Quarter                                                        
## DOMM                                                                
## DOMP                                                                
## Baths.All                                                           
## Bedrooms                                                            
## Total.Square.Footage                                                
## median_sales_num                              0.346  -0.474         
## mean_sales_num                        0.280  -0.357   0.419         
##                                      Comp.39
## ROB.FIREARM...STREET                  0.187 
## AGG.ASSLT.FIREARM.CITIZEN                   
## BURG.FORCE.RES.NIGHT                  0.139 
## LARCENY.PICK.POCKET                  -0.267 
## AUTO.THEFT...PASSENGER.VEHICLE       -0.202 
## ASSAULT...BATTERY...CITIZEN          -0.343 
## VANDALISM.MOTOR.VEHICLE                     
## WEAPON.POSSESSION.HANDGUN                   
## SEX.OFFENSE...SEX..ASSAULT                  
## drug                                        
## FAMILY.OFFENSE...ABUSE.CHILD                
## JUVENILE.RUNAWAY                            
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.211 
## DISORDERLY.CONDUCT                          
## SUICIDE...POISON.OVERDOSE                   
## LITTERING.TRASH.DUMPING                     
## TRESPASSING                          -0.172 
## HARASSMENT.STALKING                         
## DRIVING.UNDER.THE.INFLUENCE                 
## FIRE.OTHER                                  
## POL.INFORMATION                             
## LOST.PROPERTY                         0.113 
## RECOVERED.PROPERTY.MONT..CO.                
## community_facilities_count                  
## Number_of_Sales_2014                        
## Number_of_Crimes_2014                 0.620 
## IRS_Estimated_Population_2014        -0.430 
## Total_Number_of_Sales_State_Planning        
## List.Price                                  
## Original.List.Price                         
## Close.Price                                 
## Date.Quarter                                
## DOMM                                        
## DOMP                                        
## Baths.All                                   
## Bedrooms                                    
## Total.Square.Footage                        
## median_sales_num                     -0.106 
## mean_sales_num                              
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.026  0.026  0.026  0.026  0.026  0.026  0.026  0.026
## Cumulative Var  0.026  0.051  0.077  0.103  0.128  0.154  0.179  0.205
##                Comp.9 Comp.10 Comp.11 Comp.12 Comp.13 Comp.14 Comp.15
## SS loadings     1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var  0.026   0.026   0.026   0.026   0.026   0.026   0.026
## Cumulative Var  0.231   0.256   0.282   0.308   0.333   0.359   0.385
##                Comp.16 Comp.17 Comp.18 Comp.19 Comp.20 Comp.21 Comp.22
## SS loadings      1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var   0.026   0.026   0.026   0.026   0.026   0.026   0.026
## Cumulative Var   0.410   0.436   0.462   0.487   0.513   0.538   0.564
##                Comp.23 Comp.24 Comp.25 Comp.26 Comp.27 Comp.28 Comp.29
## SS loadings      1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var   0.026   0.026   0.026   0.026   0.026   0.026   0.026
## Cumulative Var   0.590   0.615   0.641   0.667   0.692   0.718   0.744
##                Comp.30 Comp.31 Comp.32 Comp.33 Comp.34 Comp.35 Comp.36
## SS loadings      1.000   1.000   1.000   1.000   1.000   1.000   1.000
## Proportion Var   0.026   0.026   0.026   0.026   0.026   0.026   0.026
## Cumulative Var   0.769   0.795   0.821   0.846   0.872   0.897   0.923
##                Comp.37 Comp.38 Comp.39
## SS loadings      1.000   1.000   1.000
## Proportion Var   0.026   0.026   0.026
## Cumulative Var   0.949   0.974   1.000
plot(fit,type="lines") # scree plot 

#fit$scores # the principal components
#The biplot is messy because there are so many components. 
#There is likely a better way to plot the biplot.
biplot(fit)

#which factors are most important? - this was useful
#This provides a ranking of the PCA by index
#The index can then be used to choose factors for the linear model
library(psych)
fit <- principal(trainset, nfactors=37, rotate="varimax")
fit # print results
## Principal Components Analysis
## Call: principal(r = trainset, nfactors = 37, rotate = "varimax")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                                        RC1  RC12   RC3   RC2   RC5   RC6
## ROB.FIREARM...STREET                  0.83  0.28 -0.01 -0.08  0.05 -0.02
## AGG.ASSLT.FIREARM.CITIZEN             0.73  0.53  0.18 -0.18 -0.05 -0.01
## BURG.FORCE.RES.NIGHT                  0.52  0.10 -0.35 -0.01  0.30 -0.01
## LARCENY.PICK.POCKET                   0.72 -0.11 -0.09  0.06  0.29 -0.03
## AUTO.THEFT...PASSENGER.VEHICLE        0.71  0.30  0.01 -0.10  0.08 -0.01
## ASSAULT...BATTERY...CITIZEN           0.67  0.49  0.10 -0.19  0.23  0.00
## VANDALISM.MOTOR.VEHICLE               0.54  0.22  0.01 -0.07  0.22 -0.03
## WEAPON.POSSESSION.HANDGUN             0.40  0.60 -0.10 -0.17  0.19 -0.02
## SEX.OFFENSE...SEX..ASSAULT            0.18  0.37  0.07 -0.14  0.22 -0.01
## drug                                  0.58  0.51  0.12 -0.18  0.09 -0.01
## FAMILY.OFFENSE...ABUSE.CHILD          0.42  0.65 -0.02 -0.22 -0.18  0.01
## JUVENILE.RUNAWAY                      0.21  0.61  0.17 -0.20  0.19 -0.01
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.93  0.11  0.04 -0.08  0.02 -0.01
## DISORDERLY.CONDUCT                    0.87  0.02 -0.06 -0.02  0.11 -0.02
## SUICIDE...POISON.OVERDOSE             0.07  0.05 -0.17  0.00  0.15 -0.02
## LITTERING.TRASH.DUMPING               0.77  0.10 -0.12 -0.03  0.10  0.00
## TRESPASSING                           0.94  0.07  0.00 -0.05  0.16  0.00
## HARASSMENT.STALKING                   0.32 -0.31 -0.08  0.19  0.09 -0.02
## DRIVING.UNDER.THE.INFLUENCE           0.38  0.10 -0.02 -0.08  0.19 -0.01
## FIRE.OTHER                            0.67  0.26  0.13 -0.06  0.23 -0.01
## POL.INFORMATION                       0.42  0.01  0.08 -0.03  0.30  0.01
## LOST.PROPERTY                         0.23 -0.10  0.08  0.04  0.86  0.01
## RECOVERED.PROPERTY.MONT..CO.          0.41  0.04 -0.04  0.01  0.78 -0.02
## community_facilities_count            0.05  0.08  0.66 -0.01 -0.06  0.00
## Number_of_Sales_2014                 -0.06 -0.06  0.98 -0.01  0.04  0.00
## Number_of_Crimes_2014                 0.61  0.21  0.59 -0.10  0.21 -0.01
## IRS_Estimated_Population_2014         0.14  0.13  0.94 -0.07  0.02  0.01
## Total_Number_of_Sales_State_Planning -0.10  0.08  0.97 -0.06 -0.04  0.00
## List.Price                           -0.09 -0.23 -0.05  0.95  0.01  0.06
## Original.List.Price                  -0.08 -0.23 -0.05  0.95  0.01  0.10
## Close.Price                          -0.08 -0.24 -0.05  0.95  0.01  0.04
## Date.Quarter                          0.00  0.01  0.00 -0.01  0.00  0.03
## DOMM                                 -0.02  0.00  0.01  0.07  0.00  0.95
## DOMP                                 -0.02  0.00  0.00  0.12 -0.01  0.94
## Baths.All                            -0.10 -0.05 -0.01  0.29 -0.01  0.04
## Bedrooms                             -0.07 -0.03 -0.08  0.29 -0.04  0.01
## Total.Square.Footage                  0.00 -0.01  0.00  0.02  0.00  0.03
## median_sales_num                     -0.15 -0.89 -0.09  0.35  0.08 -0.01
## mean_sales_num                       -0.19 -0.88 -0.07  0.36  0.05 -0.01
##                                        RC8  RC11  RC13  RC10   RC9  RC24
## ROB.FIREARM...STREET                 -0.03 -0.09 -0.01  0.00  0.00  0.27
## AGG.ASSLT.FIREARM.CITIZEN             0.03 -0.05  0.03  0.00  0.00  0.08
## BURG.FORCE.RES.NIGHT                  0.09  0.14  0.08  0.00  0.00  0.23
## LARCENY.PICK.POCKET                   0.11  0.28  0.16  0.00 -0.01  0.14
## AUTO.THEFT...PASSENGER.VEHICLE        0.07  0.00  0.18  0.00  0.00  0.34
## ASSAULT...BATTERY...CITIZEN           0.17  0.16  0.16  0.00  0.00  0.18
## VANDALISM.MOTOR.VEHICLE               0.19  0.11  0.21  0.00  0.00  0.69
## WEAPON.POSSESSION.HANDGUN             0.16 -0.05  0.10  0.01 -0.01 -0.06
## SEX.OFFENSE...SEX..ASSAULT            0.13  0.10  0.84  0.01  0.01  0.12
## drug                                 -0.02  0.17  0.16  0.00  0.00  0.17
## FAMILY.OFFENSE...ABUSE.CHILD          0.09 -0.04  0.03  0.00 -0.01 -0.11
## JUVENILE.RUNAWAY                      0.03 -0.05  0.14  0.00  0.00  0.26
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.01  0.16  0.05  0.00  0.01 -0.13
## DISORDERLY.CONDUCT                    0.11  0.30 -0.03 -0.01  0.00  0.02
## SUICIDE...POISON.OVERDOSE             0.96  0.09  0.09 -0.01 -0.01  0.07
## LITTERING.TRASH.DUMPING              -0.09 -0.06 -0.02  0.01 -0.01 -0.01
## TRESPASSING                           0.02  0.02  0.04  0.00  0.00 -0.03
## HARASSMENT.STALKING                   0.03  0.00 -0.03  0.00 -0.01  0.00
## DRIVING.UNDER.THE.INFLUENCE           0.11  0.88  0.09  0.00  0.01  0.05
## FIRE.OTHER                           -0.10  0.15 -0.08  0.01 -0.01  0.13
## POL.INFORMATION                      -0.03  0.06  0.10  0.01  0.00  0.00
## LOST.PROPERTY                         0.03  0.19  0.20  0.01  0.00  0.06
## RECOVERED.PROPERTY.MONT..CO.          0.28  0.03  0.02  0.00  0.00  0.09
## community_facilities_count           -0.01  0.03  0.11  0.00  0.00 -0.01
## Number_of_Sales_2014                 -0.04 -0.01 -0.02  0.00  0.00  0.01
## Number_of_Crimes_2014                 0.00  0.12  0.17  0.00  0.00  0.09
## IRS_Estimated_Population_2014        -0.11  0.01  0.04  0.00  0.00  0.03
## Total_Number_of_Sales_State_Planning -0.04 -0.04 -0.01  0.00  0.00 -0.03
## List.Price                            0.00 -0.02 -0.04 -0.01  0.01 -0.02
## Original.List.Price                   0.00 -0.02 -0.04  0.00  0.01 -0.01
## Close.Price                           0.00 -0.02 -0.04 -0.01  0.01 -0.02
## Date.Quarter                         -0.01  0.00  0.00  1.00 -0.01  0.00
## DOMM                                 -0.01 -0.01  0.00  0.01  0.02 -0.01
## DOMP                                 -0.02  0.00  0.00  0.02  0.01  0.00
## Baths.All                            -0.03 -0.03 -0.02  0.00  0.01 -0.02
## Bedrooms                             -0.03 -0.04 -0.01  0.01  0.00 -0.02
## Total.Square.Footage                 -0.01  0.00  0.00 -0.01  1.00  0.00
## median_sales_num                      0.00 -0.07 -0.12 -0.01  0.00 -0.07
## mean_sales_num                        0.02 -0.06 -0.10 -0.01  0.00 -0.04
##                                       RC14  RC17   RC7   RC4  RC20  RC16
## ROB.FIREARM...STREET                  0.06 -0.03  0.01 -0.03 -0.11  0.11
## AGG.ASSLT.FIREARM.CITIZEN             0.02 -0.03  0.10 -0.01  0.02  0.10
## BURG.FORCE.RES.NIGHT                  0.07 -0.04  0.10  0.00  0.02  0.61
## LARCENY.PICK.POCKET                   0.16 -0.03  0.05 -0.06  0.09  0.04
## AUTO.THEFT...PASSENGER.VEHICLE        0.02 -0.04  0.12 -0.04 -0.03  0.28
## ASSAULT...BATTERY...CITIZEN          -0.10 -0.02  0.02 -0.02  0.01  0.16
## VANDALISM.MOTOR.VEHICLE               0.00 -0.03 -0.01 -0.04 -0.01  0.14
## WEAPON.POSSESSION.HANDGUN             0.07 -0.02  0.04  0.00  0.02  0.07
## SEX.OFFENSE...SEX..ASSAULT           -0.03 -0.02  0.10 -0.01  0.07  0.04
## drug                                  0.07  0.01  0.21 -0.03  0.05 -0.05
## FAMILY.OFFENSE...ABUSE.CHILD         -0.01 -0.02  0.07  0.01  0.13  0.07
## JUVENILE.RUNAWAY                     -0.22 -0.01 -0.18 -0.03  0.04  0.10
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.01 -0.04  0.09 -0.01  0.06 -0.02
## DISORDERLY.CONDUCT                    0.13 -0.03  0.08 -0.03  0.02  0.00
## SUICIDE...POISON.OVERDOSE             0.02 -0.02 -0.02 -0.03  0.00  0.02
## LITTERING.TRASH.DUMPING               0.02 -0.01  0.03  0.03  0.17  0.18
## TRESPASSING                           0.09 -0.02  0.12 -0.02 -0.04 -0.13
## HARASSMENT.STALKING                   0.86  0.00  0.07  0.00  0.05  0.03
## DRIVING.UNDER.THE.INFLUENCE          -0.01 -0.04  0.05 -0.04  0.01  0.05
## FIRE.OTHER                            0.14 -0.04 -0.03 -0.04 -0.15  0.09
## POL.INFORMATION                       0.08  0.01  0.83 -0.01 -0.04  0.05
## LOST.PROPERTY                         0.06  0.00  0.25 -0.03 -0.03 -0.04
## RECOVERED.PROPERTY.MONT..CO.          0.04 -0.02  0.04 -0.01  0.00  0.20
## community_facilities_count            0.09 -0.01 -0.07  0.02  0.71  0.01
## Number_of_Sales_2014                  0.01 -0.01  0.04 -0.04 -0.05 -0.05
## Number_of_Crimes_2014                -0.02 -0.01  0.11 -0.03  0.19  0.07
## IRS_Estimated_Population_2014        -0.06  0.00  0.01  0.00  0.15  0.06
## Total_Number_of_Sales_State_Planning -0.05  0.00  0.02 -0.03 -0.03 -0.10
## List.Price                            0.05  0.09 -0.01  0.09  0.00 -0.01
## Original.List.Price                   0.05  0.09 -0.01  0.09  0.00 -0.01
## Close.Price                           0.05  0.09 -0.01  0.09 -0.01  0.00
## Date.Quarter                          0.00  0.00  0.00  0.01  0.00  0.00
## DOMM                                 -0.01  0.01  0.00  0.01  0.00  0.00
## DOMP                                 -0.01  0.03  0.00  0.01  0.00  0.00
## Baths.All                             0.00  0.90  0.01  0.30 -0.01 -0.01
## Bedrooms                              0.00  0.31 -0.01  0.89  0.01  0.00
## Total.Square.Footage                  0.00  0.00  0.00  0.00  0.00  0.00
## median_sales_num                      0.12  0.02  0.05  0.02  0.00  0.03
## mean_sales_num                        0.17  0.02 -0.03  0.01  0.01 -0.01
##                                       RC15  RC26  RC27  RC22  RC23  RC19
## ROB.FIREARM...STREET                  0.05  0.12  0.17  0.00  0.06  0.06
## AGG.ASSLT.FIREARM.CITIZEN             0.13  0.02  0.01 -0.03  0.08  0.07
## BURG.FORCE.RES.NIGHT                  0.05  0.05  0.06  0.12  0.05 -0.02
## LARCENY.PICK.POCKET                   0.02  0.10  0.06  0.02 -0.02  0.02
## AUTO.THEFT...PASSENGER.VEHICLE        0.00  0.25  0.15 -0.02 -0.03  0.06
## ASSAULT...BATTERY...CITIZEN           0.02 -0.04  0.07  0.02  0.06  0.08
## VANDALISM.MOTOR.VEHICLE              -0.04  0.04  0.09  0.00 -0.05  0.03
## WEAPON.POSSESSION.HANDGUN             0.57  0.12  0.02  0.07  0.06  0.05
## SEX.OFFENSE...SEX..ASSAULT            0.03 -0.01  0.04 -0.01  0.01  0.02
## drug                                  0.13 -0.05  0.06  0.05  0.06  0.42
## FAMILY.OFFENSE...ABUSE.CHILD          0.09  0.03 -0.03  0.02  0.51  0.03
## JUVENILE.RUNAWAY                      0.03  0.09  0.53  0.02 -0.03  0.03
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.01 -0.13 -0.09 -0.01  0.02 -0.06
## DISORDERLY.CONDUCT                    0.06  0.07  0.00 -0.08 -0.01 -0.12
## SUICIDE...POISON.OVERDOSE             0.03 -0.01  0.00 -0.02  0.02  0.00
## LITTERING.TRASH.DUMPING               0.08  0.00  0.02  0.54  0.02  0.03
## TRESPASSING                          -0.07 -0.08 -0.08  0.02  0.01  0.05
## HARASSMENT.STALKING                   0.02  0.03 -0.05  0.01  0.00  0.01
## DRIVING.UNDER.THE.INFLUENCE          -0.02  0.02 -0.01 -0.01 -0.01  0.02
## FIRE.OTHER                            0.18  0.51  0.09  0.00  0.03 -0.04
## POL.INFORMATION                       0.01  0.00 -0.04  0.01  0.02  0.02
## LOST.PROPERTY                         0.02  0.05 -0.04  0.04 -0.02  0.03
## RECOVERED.PROPERTY.MONT..CO.          0.05  0.00  0.13 -0.01 -0.04 -0.03
## community_facilities_count            0.01 -0.06  0.01  0.07  0.05  0.01
## Number_of_Sales_2014                 -0.02  0.02 -0.03 -0.04 -0.03 -0.01
## Number_of_Crimes_2014                 0.03  0.09  0.09  0.01  0.00  0.10
## IRS_Estimated_Population_2014         0.03  0.04  0.05  0.04  0.05  0.06
## Total_Number_of_Sales_State_Planning -0.04 -0.04  0.01 -0.04 -0.03 -0.04
## List.Price                           -0.02 -0.01 -0.02  0.00 -0.02 -0.01
## Original.List.Price                  -0.02 -0.01 -0.02  0.00 -0.01 -0.01
## Close.Price                          -0.02 -0.01 -0.02  0.00 -0.02 -0.01
## Date.Quarter                          0.00  0.00  0.00  0.00  0.00  0.00
## DOMM                                  0.00 -0.01  0.01  0.00  0.01  0.00
## DOMP                                  0.00  0.00 -0.01  0.00  0.00  0.00
## Baths.All                            -0.01 -0.01  0.00  0.00  0.00  0.00
## Bedrooms                              0.00 -0.01 -0.01  0.01  0.00  0.00
## Total.Square.Footage                  0.00  0.00  0.00  0.00  0.00  0.00
## median_sales_num                      0.01 -0.01 -0.02  0.00  0.01  0.02
## mean_sales_num                        0.02 -0.01  0.02  0.00  0.04  0.02
##                                       RC25  RC21  RC35  RC18  RC29  RC28
## ROB.FIREARM...STREET                  0.05  0.00 -0.08  0.01 -0.03 -0.03
## AGG.ASSLT.FIREARM.CITIZEN            -0.05  0.01 -0.04  0.00  0.01  0.25
## BURG.FORCE.RES.NIGHT                  0.01  0.00  0.00  0.00  0.01  0.01
## LARCENY.PICK.POCKET                   0.43  0.00 -0.01 -0.02  0.00 -0.01
## AUTO.THEFT...PASSENGER.VEHICLE        0.06  0.00 -0.04  0.06 -0.02 -0.05
## ASSAULT...BATTERY...CITIZEN          -0.02  0.00  0.02 -0.05  0.18  0.03
## VANDALISM.MOTOR.VEHICLE               0.02  0.00  0.01  0.00  0.01  0.01
## WEAPON.POSSESSION.HANDGUN             0.01  0.00  0.01  0.00  0.00  0.01
## SEX.OFFENSE...SEX..ASSAULT            0.02  0.00  0.00  0.00  0.00  0.00
## drug                                  0.02  0.00 -0.02 -0.01  0.01  0.01
## FAMILY.OFFENSE...ABUSE.CHILD         -0.01  0.00  0.00  0.00  0.00  0.01
## JUVENILE.RUNAWAY                      0.03  0.00  0.00  0.01  0.01  0.00
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.04  0.00 -0.06  0.02  0.03  0.03
## DISORDERLY.CONDUCT                   -0.03  0.00  0.25  0.00  0.03 -0.04
## SUICIDE...POISON.OVERDOSE             0.01  0.00  0.00  0.00  0.00  0.00
## LITTERING.TRASH.DUMPING               0.01  0.00 -0.01  0.00  0.00  0.00
## TRESPASSING                          -0.06  0.00 -0.04 -0.02 -0.08 -0.04
## HARASSMENT.STALKING                   0.02  0.00  0.00  0.00  0.00  0.00
## DRIVING.UNDER.THE.INFLUENCE           0.02  0.00  0.00  0.00  0.00  0.00
## FIRE.OTHER                            0.05  0.00  0.01 -0.01  0.01  0.01
## POL.INFORMATION                       0.01  0.00  0.00  0.00  0.00  0.00
## LOST.PROPERTY                         0.09  0.00  0.00 -0.16  0.00  0.00
## RECOVERED.PROPERTY.MONT..CO.         -0.08  0.00  0.00  0.23  0.00  0.00
## community_facilities_count            0.02  0.00  0.00  0.00  0.00  0.00
## Number_of_Sales_2014                  0.00  0.00 -0.04 -0.02 -0.05 -0.02
## Number_of_Crimes_2014                 0.10  0.00  0.09  0.05  0.16 -0.01
## IRS_Estimated_Population_2014        -0.05  0.00  0.03  0.02  0.11  0.03
## Total_Number_of_Sales_State_Planning  0.01  0.00  0.00 -0.01 -0.07  0.00
## List.Price                            0.00  0.01  0.00  0.00  0.00  0.00
## Original.List.Price                   0.00 -0.01  0.00  0.00  0.00  0.00
## Close.Price                           0.01  0.01  0.00  0.00  0.00  0.00
## Date.Quarter                          0.00  0.00  0.00  0.00  0.00  0.00
## DOMM                                  0.00 -0.31  0.00  0.00  0.00  0.00
## DOMP                                 -0.01  0.32  0.00  0.00  0.00  0.00
## Baths.All                             0.00  0.00  0.00  0.00  0.00  0.00
## Bedrooms                             -0.01  0.00  0.00  0.00  0.00  0.00
## Total.Square.Footage                  0.00  0.00  0.00  0.00  0.00  0.00
## median_sales_num                      0.00  0.00  0.02  0.00  0.03  0.01
## mean_sales_num                        0.02  0.00 -0.02 -0.02 -0.02  0.02
##                                       RC30  RC32  RC31  RC33  RC34  RC36
## ROB.FIREARM...STREET                  0.22  0.03  0.00  0.00  0.00  0.00
## AGG.ASSLT.FIREARM.CITIZEN            -0.02 -0.02  0.00  0.00  0.00  0.00
## BURG.FORCE.RES.NIGHT                  0.00 -0.01  0.00  0.00  0.00  0.00
## LARCENY.PICK.POCKET                   0.01  0.01  0.00  0.00  0.00  0.00
## AUTO.THEFT...PASSENGER.VEHICLE        0.05  0.18  0.00  0.00  0.00  0.00
## ASSAULT...BATTERY...CITIZEN          -0.02 -0.06  0.02 -0.03  0.00 -0.01
## VANDALISM.MOTOR.VEHICLE               0.00 -0.01  0.00  0.00  0.00  0.00
## WEAPON.POSSESSION.HANDGUN             0.00  0.00  0.00  0.00  0.00  0.00
## SEX.OFFENSE...SEX..ASSAULT            0.00  0.00  0.00  0.00  0.00  0.00
## drug                                  0.01  0.00  0.00  0.00  0.00  0.00
## FAMILY.OFFENSE...ABUSE.CHILD          0.00  0.00  0.00  0.00  0.00  0.00
## JUVENILE.RUNAWAY                      0.01  0.00  0.00  0.00  0.00  0.00
## LIQUOR...UNLAWFUL.POSS.UNDER.21      -0.05 -0.01  0.08  0.01  0.00  0.00
## DISORDERLY.CONDUCT                   -0.04 -0.02  0.00  0.00  0.00  0.00
## SUICIDE...POISON.OVERDOSE             0.00  0.00  0.00  0.00  0.00  0.00
## LITTERING.TRASH.DUMPING               0.00  0.00  0.00  0.00  0.00  0.00
## TRESPASSING                          -0.04 -0.02 -0.08  0.00  0.00  0.00
## HARASSMENT.STALKING                   0.00  0.00  0.00  0.00  0.00  0.00
## DRIVING.UNDER.THE.INFLUENCE           0.00  0.00  0.00  0.00  0.00  0.00
## FIRE.OTHER                            0.00 -0.01  0.00  0.00  0.00  0.00
## POL.INFORMATION                       0.00  0.00  0.00  0.00  0.00  0.00
## LOST.PROPERTY                        -0.01 -0.01  0.00  0.00  0.00  0.00
## RECOVERED.PROPERTY.MONT..CO.          0.01  0.02  0.00  0.00  0.00  0.00
## community_facilities_count            0.00  0.00  0.00  0.00  0.00  0.00
## Number_of_Sales_2014                  0.00  0.00  0.01 -0.01  0.00 -0.05
## Number_of_Crimes_2014                -0.06  0.03 -0.02  0.04  0.00  0.01
## IRS_Estimated_Population_2014        -0.01  0.01  0.01  0.08  0.00  0.02
## Total_Number_of_Sales_State_Planning  0.01 -0.01 -0.01 -0.07  0.00  0.04
## List.Price                            0.00  0.00  0.00  0.00 -0.01  0.00
## Original.List.Price                   0.00  0.00  0.00  0.00  0.07  0.00
## Close.Price                           0.00  0.00  0.00  0.00 -0.06  0.00
## Date.Quarter                          0.00  0.00  0.00  0.00  0.00  0.00
## DOMM                                  0.00  0.00  0.00  0.00  0.00  0.00
## DOMP                                  0.00  0.00  0.00  0.00  0.00  0.00
## Baths.All                             0.00  0.00  0.00  0.00  0.00  0.00
## Bedrooms                              0.00  0.00  0.00  0.00  0.00  0.00
## Total.Square.Footage                  0.00  0.00  0.00  0.00  0.00  0.00
## median_sales_num                      0.00 -0.01  0.02 -0.01  0.00  0.00
## mean_sales_num                       -0.01  0.00 -0.02  0.01  0.00  0.00
##                                       RC37 h2      u2 com
## ROB.FIREARM...STREET                  0.00  1 1.4e-05 2.1
## AGG.ASSLT.FIREARM.CITIZEN             0.00  1 8.8e-07 2.8
## BURG.FORCE.RES.NIGHT                  0.00  1 7.6e-06 4.1
## LARCENY.PICK.POCKET                   0.00  1 2.8e-05 3.2
## AUTO.THEFT...PASSENGER.VEHICLE        0.00  1 1.6e-05 3.4
## ASSAULT...BATTERY...CITIZEN           0.00  1 4.6e-05 3.7
## VANDALISM.MOTOR.VEHICLE               0.00  1 1.5e-06 3.1
## WEAPON.POSSESSION.HANDGUN             0.00  1 1.5e-06 3.8
## SEX.OFFENSE...SEX..ASSAULT            0.00  1 2.1e-08 2.0
## drug                                  0.00  1 6.5e-07 4.6
## FAMILY.OFFENSE...ABUSE.CHILD          0.00  1 1.8e-07 3.6
## JUVENILE.RUNAWAY                      0.00  1 3.5e-07 4.3
## LIQUOR...UNLAWFUL.POSS.UNDER.21       0.00  1 1.8e-05 1.3
## DISORDERLY.CONDUCT                    0.00  1 2.4e-06 1.7
## SUICIDE...POISON.OVERDOSE             0.00  1 2.5e-06 1.2
## LITTERING.TRASH.DUMPING               0.00  1 4.7e-08 2.3
## TRESPASSING                           0.00  1 1.3e-05 1.3
## HARASSMENT.STALKING                   0.00  1 1.1e-07 1.8
## DRIVING.UNDER.THE.INFLUENCE           0.00  1 5.1e-07 1.6
## FIRE.OTHER                            0.00  1 8.6e-07 3.6
## POL.INFORMATION                       0.00  1 1.7e-06 1.9
## LOST.PROPERTY                         0.00  1 5.0e-06 1.8
## RECOVERED.PROPERTY.MONT..CO.          0.00  1 3.1e-06 2.4
## community_facilities_count            0.00  1 6.3e-07 2.2
## Number_of_Sales_2014                  0.00  1 2.0e-06 1.1
## Number_of_Crimes_2014                -0.01  1 1.5e-04 3.8
## IRS_Estimated_Population_2014         0.00  1 7.3e-05 1.3
## Total_Number_of_Sales_State_Planning  0.00  1 1.1e-06 1.1
## List.Price                            0.00  1 1.2e-03 1.2
## Original.List.Price                   0.00  1 1.6e-04 1.2
## Close.Price                           0.00  1 4.7e-04 1.2
## Date.Quarter                          0.00  1 3.6e-10 1.0
## DOMM                                  0.00  1 1.5e-07 1.2
## DOMP                                  0.00  1 1.1e-07 1.3
## Baths.All                             0.00  1 7.1e-10 1.5
## Bedrooms                              0.00  1 3.1e-08 1.5
## Total.Square.Footage                  0.00  1 1.5e-11 1.0
## median_sales_num                     -0.05  1 8.7e-06 1.5
## mean_sales_num                        0.05  1 5.6e-06 1.6
## 
##                        RC1 RC12  RC3  RC2  RC5  RC6  RC8 RC11 RC13 RC10
## SS loadings           8.71 4.33 3.91 3.49 2.11 1.80 1.21 1.18 1.04 1.00
## Proportion Var        0.22 0.11 0.10 0.09 0.05 0.05 0.03 0.03 0.03 0.03
## Cumulative Var        0.22 0.33 0.43 0.52 0.58 0.62 0.66 0.69 0.71 0.74
## Proportion Explained  0.22 0.11 0.10 0.09 0.05 0.05 0.03 0.03 0.03 0.03
## Cumulative Proportion 0.22 0.33 0.43 0.52 0.58 0.62 0.66 0.69 0.71 0.74
##                        RC9 RC24 RC14 RC17  RC7  RC4 RC20 RC16 RC15 RC26
## SS loadings           1.00 0.97 0.97 0.95 0.95 0.94 0.68 0.67 0.43 0.43
## Proportion Var        0.03 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.01 0.01
## Cumulative Var        0.76 0.79 0.81 0.84 0.86 0.89 0.90 0.92 0.93 0.94
## Proportion Explained  0.03 0.02 0.02 0.02 0.02 0.02 0.02 0.02 0.01 0.01
## Cumulative Proportion 0.76 0.79 0.81 0.84 0.86 0.89 0.90 0.92 0.93 0.94
##                       RC27 RC22 RC23 RC19 RC25 RC21 RC35 RC18 RC29 RC28
## SS loadings           0.42 0.34 0.31 0.24 0.23 0.20 0.09 0.09 0.09 0.07
## Proportion Var        0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00
## Cumulative Var        0.95 0.96 0.97 0.98 0.98 0.99 0.99 0.99 0.99 1.00
## Proportion Explained  0.01 0.01 0.01 0.01 0.01 0.01 0.00 0.00 0.00 0.00
## Cumulative Proportion 0.95 0.96 0.97 0.98 0.98 0.99 0.99 0.99 0.99 1.00
##                       RC30 RC32 RC31 RC33 RC34 RC36 RC37
## SS loadings           0.06 0.04 0.01 0.01 0.01 0.01    0
## Proportion Var        0.00 0.00 0.00 0.00 0.00 0.00    0
## Cumulative Var        1.00 1.00 1.00 1.00 1.00 1.00    1
## Proportion Explained  0.00 0.00 0.00 0.00 0.00 0.00    0
## Cumulative Proportion 1.00 1.00 1.00 1.00 1.00 1.00    1
## 
## Mean item complexity =  2.2
## Test of the hypothesis that 37 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0 
##  with the empirical chi square  0.02  with prob <  NA 
## 
## Fit based upon off diagonal values = 1
#linear model with numeric data from top PCA factors - Model 7
#final model AIC 232728.8 - not an improvement over baseline
mylogit <- lm(
  as.formula(paste(colnames(trainset)[31], "~",
                   paste(colnames(trainset)[c(1,16,3,2,5,6,8,10,13,24)], collapse = "+"),
                   sep = ""
  )),
  data=trainset
)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=232747.2
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + AUTO.THEFT...PASSENGER.VEHICLE + 
##     ASSAULT...BATTERY...CITIZEN + WEAPON.POSSESSION.HANDGUN + 
##     drug + LIQUOR...UNLAWFUL.POSS.UNDER.21 + community_facilities_count
## 
##                                   Df  Sum of Sq        RSS    AIC
## - drug                             1 7.1020e+10 1.1388e+15 232746
## <none>                                          1.1387e+15 232747
## - LITTERING.TRASH.DUMPING          1 1.6920e+12 1.1404e+15 232759
## - WEAPON.POSSESSION.HANDGUN        1 3.2981e+12 1.1420e+15 232772
## - AUTO.THEFT...PASSENGER.VEHICLE   1 4.0868e+12 1.1428e+15 232778
## - community_facilities_count       1 1.2830e+13 1.1515e+15 232847
## - ROB.FIREARM...STREET             1 1.3874e+13 1.1526e+15 232856
## - AGG.ASSLT.FIREARM.CITIZEN        1 1.4324e+13 1.1530e+15 232859
## - LIQUOR...UNLAWFUL.POSS.UNDER.21  1 1.7837e+13 1.1565e+15 232887
## - BURG.FORCE.RES.NIGHT             1 3.3992e+13 1.1727e+15 233013
## - ASSAULT...BATTERY...CITIZEN      1 6.0330e+13 1.1990e+15 233215
## 
## Step:  AIC=232745.8
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + AUTO.THEFT...PASSENGER.VEHICLE + 
##     ASSAULT...BATTERY...CITIZEN + WEAPON.POSSESSION.HANDGUN + 
##     LIQUOR...UNLAWFUL.POSS.UNDER.21 + community_facilities_count
## 
##                                   Df  Sum of Sq        RSS    AIC
## <none>                                          1.1388e+15 232746
## + drug                             1 7.1020e+10 1.1387e+15 232747
## - LITTERING.TRASH.DUMPING          1 1.7932e+12 1.1405e+15 232758
## - WEAPON.POSSESSION.HANDGUN        1 3.6474e+12 1.1424e+15 232773
## - AUTO.THEFT...PASSENGER.VEHICLE   1 4.3407e+12 1.1431e+15 232778
## - community_facilities_count       1 1.2782e+13 1.1515e+15 232845
## - ROB.FIREARM...STREET             1 1.3849e+13 1.1526e+15 232854
## - AGG.ASSLT.FIREARM.CITIZEN        1 1.4856e+13 1.1536e+15 232862
## - LIQUOR...UNLAWFUL.POSS.UNDER.21  1 1.7975e+13 1.1567e+15 232886
## - BURG.FORCE.RES.NIGHT             1 3.7626e+13 1.1764e+15 233040
## - ASSAULT...BATTERY...CITIZEN      1 7.5101e+13 1.2139e+15 233325
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + AUTO.THEFT...PASSENGER.VEHICLE + 
##     ASSAULT...BATTERY...CITIZEN + WEAPON.POSSESSION.HANDGUN + 
##     drug + LIQUOR...UNLAWFUL.POSS.UNDER.21 + community_facilities_count
## 
## Final Model:
## Close.Price ~ ROB.FIREARM...STREET + LITTERING.TRASH.DUMPING + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN + AUTO.THEFT...PASSENGER.VEHICLE + 
##     ASSAULT...BATTERY...CITIZEN + WEAPON.POSSESSION.HANDGUN + 
##     LIQUOR...UNLAWFUL.POSS.UNDER.21 + community_facilities_count
## 
## 
##     Step Df    Deviance Resid. Df   Resid. Dev      AIC
## 1                            9097 1.138679e+15 232747.2
## 2 - drug  1 71019914818      9098 1.138750e+15 232745.8
plot(mylogit)

#
#
#
#
#
#predict house price
devset2$predicted_close_price7<-predict(mylogit, devset2)

# how far off are the predicted prices
devset2$difference7 <- devset2$predicted_close_price7 - devset2$Close.Price
devset2$percent_error7 <- abs(devset2$difference7/devset2$Close.Price)

#see the indices for creating the model by index
names(trainset)
##  [1] "ROB.FIREARM...STREET"                
##  [2] "AGG.ASSLT.FIREARM.CITIZEN"           
##  [3] "BURG.FORCE.RES.NIGHT"                
##  [4] "LARCENY.PICK.POCKET"                 
##  [5] "AUTO.THEFT...PASSENGER.VEHICLE"      
##  [6] "ASSAULT...BATTERY...CITIZEN"         
##  [7] "VANDALISM.MOTOR.VEHICLE"             
##  [8] "WEAPON.POSSESSION.HANDGUN"           
##  [9] "SEX.OFFENSE...SEX..ASSAULT"          
## [10] "drug"                                
## [11] "FAMILY.OFFENSE...ABUSE.CHILD"        
## [12] "JUVENILE.RUNAWAY"                    
## [13] "LIQUOR...UNLAWFUL.POSS.UNDER.21"     
## [14] "DISORDERLY.CONDUCT"                  
## [15] "SUICIDE...POISON.OVERDOSE"           
## [16] "LITTERING.TRASH.DUMPING"             
## [17] "TRESPASSING"                         
## [18] "HARASSMENT.STALKING"                 
## [19] "DRIVING.UNDER.THE.INFLUENCE"         
## [20] "FIRE.OTHER"                          
## [21] "POL.INFORMATION"                     
## [22] "LOST.PROPERTY"                       
## [23] "RECOVERED.PROPERTY.MONT..CO."        
## [24] "community_facilities_count"          
## [25] "Number_of_Sales_2014"                
## [26] "Number_of_Crimes_2014"               
## [27] "IRS_Estimated_Population_2014"       
## [28] "Total_Number_of_Sales_State_Planning"
## [29] "List.Price"                          
## [30] "Original.List.Price"                 
## [31] "Close.Price"                         
## [32] "Date.Quarter"                        
## [33] "DOMM"                                
## [34] "DOMP"                                
## [35] "Baths.All"                           
## [36] "Bedrooms"                            
## [37] "Total.Square.Footage"                
## [38] "median_sales_num"                    
## [39] "mean_sales_num"
#what if we use the PCA to give us a model
#model with PCA columns
mylogit <- lm(
  as.formula(paste(colnames(trainset)[29], "~",
                   paste(colnames(trainset)[c(1, 11, 3, 2)], collapse = "+"),
                   sep = ""
  )),
  data=trainset
)
step<-stepAIC(mylogit, direction = "both")
## Start:  AIC=234184.9
## List.Price ~ ROB.FIREARM...STREET + FAMILY.OFFENSE...ABUSE.CHILD + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN
## 
##                                Df  Sum of Sq        RSS    AIC
## <none>                                       1.3351e+15 234185
## - BURG.FORCE.RES.NIGHT          1 3.6885e+12 1.3388e+15 234208
## - ROB.FIREARM...STREET          1 5.4312e+12 1.3406e+15 234220
## - AGG.ASSLT.FIREARM.CITIZEN     1 2.8118e+13 1.3633e+15 234373
## - FAMILY.OFFENSE...ABUSE.CHILD  1 4.4437e+13 1.3796e+15 234481
step$anova
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## List.Price ~ ROB.FIREARM...STREET + FAMILY.OFFENSE...ABUSE.CHILD + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN
## 
## Final Model:
## List.Price ~ ROB.FIREARM...STREET + FAMILY.OFFENSE...ABUSE.CHILD + 
##     BURG.FORCE.RES.NIGHT + AGG.ASSLT.FIREARM.CITIZEN
## 
## 
##   Step Df Deviance Resid. Df   Resid. Dev      AIC
## 1                       9103 1.335134e+15 234184.9
plot(mylogit)

#
#
#
#
#
#predict house price
devset2$predicted_close_price8<-predict(mylogit, devset2)

# how far off are the predicted prices
devset2$difference8 <- devset2$predicted_close_price8 - devset2$Close.Price
devset2$percent_error8 <- abs(devset2$difference8/devset2$Close.Price)


#Non Graphical Solutions to Scree Test
#need more understanding of this concept in order to interpret this plot
library(nFactors)
ev <- eigen(cor(trainset)) # get eigenvalues
ap <- parallel(subject=nrow(trainset),var=ncol(trainset),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

#run PCA again but with fewer factors to get a biplot that is readable
new_trainset <- (trainset)[c(1, 11, 3, 2, 29)]
fit <- princomp(new_trainset, cor=TRUE)
summary(fit) # print variance accounted for 
## Importance of components:
##                          Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     1.712794 1.0099408 0.7672067 0.58367303 0.34216120
## Proportion of Variance 0.586733 0.2039961 0.1177212 0.06813484 0.02341486
## Cumulative Proportion  0.586733 0.7907291 0.9084503 0.97658514 1.00000000
#loadings(fit) # pc loadings 
plot(fit,type="lines") # scree plot 

#fit$scores # the principal components
biplot(fit)

#attempt at more readable biplot, did not work
biplot(fit, expand=10, xlim=c(-0.30, 0.0), ylim=c(-0.1, 0.1))

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.